diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 3fd9e8e56..24d4dce1c 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -81,8 +81,14 @@ jobs: run: | mkdir -p Generated/BIN - - name: Run command - Build and Test - run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION + # Since these are just examples to smoke-test the in-source build process, + # let’s not run the whole build and test suite, which is quite slow (6 min + # 26 s as of this writing). Just run the invariant tests and smoke-test + # MakeUnicodeFiles. We don’t even check that MakeUnicodeFiles doesn’t + # change anything, which makes little sense; but that is the job of the + # other job. + - name: Run invariant tests + run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -91,14 +97,15 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - out-of-source-build: - name: Out-of-source Instructions + + # Out-of-source build. + ucd-and-smoke-tests: + name: Check UCD consistency, invariants, smoke-test generators runs-on: ubuntu-latest steps: - name: Checkout Unicode Tools uses: actions/checkout@v3 with: - repository: unicode-org/unicodetools path: unicodetools/mine/src - name: Get the CLDR_REF from pom.xml id: cldr_ref @@ -136,6 +143,30 @@ jobs: run: | mkdir -p unicodetools/mine/Generated/BIN + - name: Run command - Make Unicode Files + run: | + cd unicodetools/mine/src + mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Check that UCD files are consistent + run: | + cd unicodetools/mine/src + ./py/copygenerateducd.py --out-of-source -y + git diff --compact-summary --exit-code || { + git diff --compact-summary | + awk '{ + if (previous) { + print "::error file="previous",title=File must be regenerated::Run org.unicode.text.UCD.Main build MakeUnicodeFiles and copy any changed files to unicodetools/data/ucd/dev." + } + previous=$1 + }' + exit 1 + } + + # Only test once we know the UCD is internally consistent. + # MakeUnicodeFiles is much faster than this anyway. - name: Run command - Build and Test run: | cd unicodetools/mine/src @@ -151,13 +182,6 @@ jobs: path: | unicodetools/mine/Generated/UnicodeTestResults.* - - name: Run command - Make Unicode Files - run: | - cd unicodetools/mine/src - mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # https://github.com/unicode-org/unicodetools/blob/main/docs/emoji/aac.md#aacorderjava - name: Run command - AAC Order run: | @@ -166,18 +190,6 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests - # Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names - - name: Run command - UCA - collation validity log - run: | - cd unicodetools/mine/src - # invoke main() in class ...UCA.Main - mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION - # check for output file - compgen -G "../Generated/UCA/*/CheckCollationValidity.html" - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # https://github.com/unicode-org/unicodetools/blob/main/docs/idna.md - name: Run command - IDNA run: | @@ -252,3 +264,61 @@ jobs: mvn -s .github/workflows/mvn-settings.xml -Dexec.mainClass="org.unicode.propstest.CheckProperties" -Dexec.classpathScope=test test-compile -Dexec.args="COMPARE ALL $PREVIOUS_UVERSION" compile exec:java -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Out-of-source build. + uca: + name: Check UCA data + runs-on: ubuntu-latest + steps: + - name: Checkout Unicode Tools + uses: actions/checkout@v3 + with: + repository: unicode-org/unicodetools + path: unicodetools/mine/src + - name: Get the CLDR_REF from pom.xml + id: cldr_ref + run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} + - name: Verify CLDR checkout ref + run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty + - name: Cache CLDR repository + uses: actions/cache@v3 + with: + path: cldr/mine/src + key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }} + restore-keys: | + cldr + - name: Check out CLDR + uses: actions/checkout@v3 + with: + repository: unicode-org/cldr + path: cldr/mine/src + ref: main + fetch-depth: 0 + - name: Switch CLDR to CLDR_REF + run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }} + - name: Set up JDK 11 + uses: actions/setup-java@v1 + with: + java-version: 11 + - name: Cache local Maven repository + uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + - name: Set up out-of-source output dir + run: | + mkdir -p unicodetools/mine/Generated/BIN + + # https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests + # Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names + - name: Run command - UCA - collation validity log + run: | + cd unicodetools/mine/src + # invoke main() in class ...UCA.Main + mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION + # check for output file + compgen -G "../Generated/UCA/*/CheckCollationValidity.html" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/UnicodeJsps/Dockerfile b/UnicodeJsps/Dockerfile index 1c1fdc76f..6d68d359c 100644 --- a/UnicodeJsps/Dockerfile +++ b/UnicodeJsps/Dockerfile @@ -3,7 +3,7 @@ FROM alpine as cbuild WORKDIR /build RUN apk add --update wget make gcc musl-dev ARG CPATH=https://www.unicode.org/Public/PROGRAMS/BidiReferenceC/ -ARG CVERSION=15.0.0 +ARG CVERSION=15.1.0 RUN wget -np -nv --reject-regex='.*\.(lib|exe)$' --cut-dirs=4 -nH -r ${CPATH}${CVERSION}/ RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c brutils.c brtest.c brtable.c brrule.c RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true) diff --git a/UnicodeJsps/jetty.d/ROOT/robots.txt b/UnicodeJsps/jetty.d/ROOT/robots.txt new file mode 100644 index 000000000..a40ff93be --- /dev/null +++ b/UnicodeJsps/jetty.d/ROOT/robots.txt @@ -0,0 +1,2 @@ +User-agent: * +Disallow: /UnicodeJsps diff --git a/UnicodeJsps/pom.xml b/UnicodeJsps/pom.xml index 98f0e75b2..83d01106f 100644 --- a/UnicodeJsps/pom.xml +++ b/UnicodeJsps/pom.xml @@ -65,7 +65,7 @@ com.google.guava guava - 29.0-jre + 32.0.0-jre diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt index a1e389570..14541ac14 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt @@ -1,5 +1,5 @@ # IdentifierStatus.txt -# Date: 2023-05-16, 22:25:15 GMT +# Date: 2023-08-11, 17:46:41 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -582,8 +582,8 @@ FA27..FA29 ; Allowed # 1.1 [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK CO 2B740..2B81D ; Allowed # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Allowed # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Allowed # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2EBF0..2EE4A ; Allowed # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A +2EBF0..2EE5D ; Allowed # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Allowed # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Allowed # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 112759 +# Total code points: 112778 diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt index 59602a664..695156e6a 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt @@ -1,5 +1,5 @@ # IdentifierType.txt -# Date: 2023-05-16, 22:25:14 GMT +# Date: 2023-08-11, 17:46:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -576,11 +576,11 @@ FA27..FA29 ; Recommended # 1.1 [3] CJK COMPATIBILITY ID 2B740..2B81D ; Recommended # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Recommended # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Recommended # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2EBF0..2EE4A ; Recommended # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A +2EBF0..2EE5D ; Recommended # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 30000..3134A ; Recommended # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Recommended # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 112742 +# Total code points: 112761 # Identifier_Type: Inclusion diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt index 0ef35b90e..3fb44638b 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt @@ -1,5 +1,5 @@ # IdnaMappingTable.txt -# Date: 2023-05-15, 22:37:02 GMT +# Date: 2023-08-10, 22:32:27 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2036,7 +2036,7 @@ 1E9A ; mapped ; 0061 02BE # 1.1 LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B ; mapped ; 1E61 # 2.0 LATIN SMALL LETTER LONG S WITH DOT ABOVE 1E9C..1E9D ; valid # 5.1 LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE -1E9E ; mapped ; 0073 0073 # 5.1 LATIN CAPITAL LETTER SHARP S +1E9E ; mapped ; 00DF # 5.1 LATIN CAPITAL LETTER SHARP S 1E9F ; valid # 5.1 LATIN SMALL LETTER DELTA 1EA0 ; mapped ; 1EA1 # 1.1 LATIN CAPITAL LETTER A WITH DOT BELOW 1EA1 ; valid # 1.1 LATIN SMALL LETTER A WITH DOT BELOW @@ -3422,7 +3422,7 @@ 31C0..31CF ; valid ; ; NV8 # 4.1 CJK STROKE T..CJK STROKE N 31D0..31E3 ; valid ; ; NV8 # 5.1 CJK STROKE H..CJK STROKE Q 31E4..31EE ; disallowed # NA .. -31EF ; valid ; ; NV8 # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION +31EF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; valid # 3.2 KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200 ; disallowed_STD3_mapped ; 0028 1100 0029 #1.1 PARENTHESIZED HANGUL KIYEOK 3201 ; disallowed_STD3_mapped ; 0028 1102 0029 #1.1 PARENTHESIZED HANGUL NIEUN @@ -8448,8 +8448,8 @@ FFFE..FFFF ; disallowed # 1.1 .. 2CEB0..2EBE0 ; valid # 10.0 CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 2EBE1..2EBEF ; disallowed # NA .. -2EBF0..2EE4A ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A -2EE4B..2F7FF ; disallowed # NA .. +2EBF0..2EE5D ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2EE5E..2F7FF ; disallowed # NA .. 2F800 ; mapped ; 4E3D # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F800 2F801 ; mapped ; 4E38 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F801 2F802 ; mapped ; 4E41 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F802 diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt index eb3ed3c8a..4dd0fb39d 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt @@ -1,10 +1,7 @@ ; charset=UTF-8 @@@ The Unicode Standard 15.1.0 -@@@+ U15M230512.lst - Unicode 15.1.0 names list, seventh delta. - Repertoire synched with UnicodeData-15.1.0d2.txt. - Tweak use of notices to suppress year expansions. - Update annotations for 06F4..06F7. +@@@+ U15M230728.lst + Unicode 15.1.0 final names list. This file is semi-automatically derived from UnicodeData.txt and a set of manually created annotations using a script to select or suppress information from the data file. The rules used @@ -2429,7 +2426,8 @@ = apostrophe * glottal stop, glottalization, ejective * many languages use this as a letter of their alphabets - * used as a tone marker in Bodo, Dogri, and Maithili + * used as a tone marker in Bodo and Dogri + * indicates vowel elongation, or various truncations and ellipsis in Maithili * used as a modifier letter in the Lisu script * 2019 is the preferred character for a punctuation apostrophe x (apostrophe - 0027) @@ -4456,7 +4454,8 @@ * Uyghur, Kazakh 06CC ARABIC LETTER FARSI YEH * Arabic, Persian, Urdu, Kashmiri, ... - * initial and medial forms of this letter have dots + * initial and medial forms of this letter have two horizontal dots below + * retains its dots in initial and medial forms when used in combination with 0654 x (arabic letter alef maksura - 0649) x (arabic letter yeh - 064A) 06CD ARABIC LETTER YEH WITH TAIL @@ -19522,8 +19521,10 @@ * indicates pronunciation on one side of the mouth only x (section sign - 00A7) @ Dashes +@+ These long dashes are shown in the code charts inside dashed square boxes because of their width. In production fonts they would simply display as extra-wide dashes. 2E3A TWO-EM DASH = omission dash + * may be used in Chinese for abrupt change of thought, inserting new content, or continuation of tone or sound x (em dash - 2014) 2E3B THREE-EM DASH @ Alternate forms of punctuation @@ -26618,7 +26619,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH @@+ @+ This block, despite its name, contains a number of unified CJK ideographs. Each is also individually identified by an annotation. @+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source. -@ Pronunciation variants from KS X 1001:1998 +@ Pronunciation variants from KS X 1001:1998 F900 CJK COMPATIBILITY IDEOGRAPH-F900 : 8C48 F901 CJK COMPATIBILITY IDEOGRAPH-F901 @@ -44024,6 +44025,7 @@ FFFF 1BC01 DUPLOYAN LETTER X * Salishan @ Line consonants +@+ Small arrows shown in the chart glyphs for some strokes and arcs indicate the handwriting direction of these characters, and are not visibly rendered. 1BC02 DUPLOYAN LETTER P * Chinook number 1 1BC03 DUPLOYAN LETTER T @@ -54200,9 +54202,9 @@ FFFF @@ 2B740 CJK Unified Ideographs Extension D 2B81D @@ 2B820 CJK Unified Ideographs Extension E 2CEA1 @@ 2CEB0 CJK Unified Ideographs Extension F 2EBE0 -@@ 2EBF0 CJK Unified Ideographs Extension I 2EE4A +@@ 2EBF0 CJK Unified Ideographs Extension I 2EE5D @@ 2F800 CJK Compatibility Ideographs Supplement 2FA1F -@ Duplicate characters from CNS 11643-1992 +@ Duplicate characters from CNS 11643-1992 2F800 CJK COMPATIBILITY IDEOGRAPH-2F800 : 4E3D 2F801 CJK COMPATIBILITY IDEOGRAPH-2F801 diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt index 9b39c97b9..cf73eca00 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt @@ -1,5 +1,5 @@ # confusables.txt -# Date: 2023-05-16, 22:25:14 GMT +# Date: 2023-08-11, 17:46:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -349,8 +349,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP # -30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ -FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +30FB ; 00B7 ; MA # ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +FF65 ; 00B7 ; MA # ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ 16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT # 0387 ; 00B7 ; MA # ( · → · ) GREEK ANO TELEIA → MIDDLE DOT # 2E31 ; 00B7 ; MA #* ( ⸱ → · ) WORD SEPARATOR MIDDLE DOT → MIDDLE DOT # diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt index a017cd2a2..686b25ab7 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt @@ -1,5 +1,5 @@ # PropertyAliases-15.1.0.txt -# Date: 2023-03-23, 00:36:58 GMT +# Date: 2023-08-07, 15:21:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -122,6 +122,7 @@ ea ; East_Asian_Width gc ; General_Category GCB ; Grapheme_Cluster_Break hst ; Hangul_Syllable_Type +InCB ; Indic_Conjunct_Break InPC ; Indic_Positional_Category InSC ; Indic_Syllabic_Category jg ; Joining_Group @@ -211,6 +212,6 @@ XO_NFKC ; Expands_On_NFKC XO_NFKD ; Expands_On_NFKD # ================================================ -# Total: 133 +# Total: 134 # EOF diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt index 9d1400e40..17ffec935 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt @@ -1,5 +1,5 @@ -# PropertyValueAliases-15.1.0.txt -# Date: 2023-05-10, 16:59:10 GMT +# PropertyValueAliases-16.0.0.txt +# Date: 2023-10-17, 12:29:15 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -92,6 +92,7 @@ age; 13.0 ; V13_0 age; 14.0 ; V14_0 age; 15.0 ; V15_0 age; 15.1 ; V15_1 +age; 16.0 ; V16_0 age; NA ; Unassigned # Alphabetic (Alpha) @@ -367,6 +368,7 @@ blk; Music ; Musical_Symbols blk; Myanmar ; Myanmar blk; Myanmar_Ext_A ; Myanmar_Extended_A blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Myanmar_Ext_C ; Myanmar_Extended_C blk; Nabataean ; Nabataean blk; Nag_Mundari ; Nag_Mundari blk; Nandinagari ; Nandinagari @@ -425,6 +427,7 @@ blk; Soyombo ; Soyombo blk; Specials ; Specials blk; Sundanese ; Sundanese blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sunuwar ; Sunuwar blk; Sup_Arrows_A ; Supplemental_Arrows_A blk; Sup_Arrows_B ; Supplemental_Arrows_B blk; Sup_Arrows_C ; Supplemental_Arrows_C @@ -460,6 +463,7 @@ blk; Thai ; Thai blk; Tibetan ; Tibetan blk; Tifinagh ; Tifinagh blk; Tirhuta ; Tirhuta +blk; Todhri ; Todhri blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics @@ -853,6 +857,13 @@ IDS; Y ; Yes ; T Ideo; N ; No ; F ; False Ideo; Y ; Yes ; T ; True +# Indic_Conjunct_Break (InCB) + +InCB; Consonant ; Consonant +InCB; Extend ; Extend +InCB; Linker ; Linker +InCB; None ; None + # Indic_Positional_Category (InPC) InPC; Bottom ; Bottom @@ -1416,6 +1427,7 @@ sc ; Sogo ; Old_Sogdian sc ; Sora ; Sora_Sompeng sc ; Soyo ; Soyombo sc ; Sund ; Sundanese +sc ; Sunu ; Sunuwar sc ; Sylo ; Syloti_Nagri sc ; Syrc ; Syriac sc ; Tagb ; Tagbanwa @@ -1433,6 +1445,7 @@ sc ; Thai ; Thai sc ; Tibt ; Tibetan sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa +sc ; Todr ; Todhri sc ; Toto ; Toto sc ; Ugar ; Ugaritic sc ; Vaii ; Vai diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt index 47ff90bd2..dfeae158e 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt @@ -1,5 +1,5 @@ # emoji-sequences.txt -# Date: 2023-05-05, 23:24:34 GMT +# Date: 2023-06-05, 21:39:54 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -784,7 +784,7 @@ 1F1F9 1F1F2 ; RGI_Emoji_Flag_Sequence ; flag: Turkmenistan # E2.0 [1] (🇹🇲) 1F1F9 1F1F3 ; RGI_Emoji_Flag_Sequence ; flag: Tunisia # E2.0 [1] (🇹🇳) 1F1F9 1F1F4 ; RGI_Emoji_Flag_Sequence ; flag: Tonga # E2.0 [1] (🇹🇴) -1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Turkey # E2.0 [1] (🇹🇷) +1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Türkiye # E2.0 [1] (🇹🇷) 1F1F9 1F1F9 ; RGI_Emoji_Flag_Sequence ; flag: Trinidad & Tobago # E2.0 [1] (🇹🇹) 1F1F9 1F1FB ; RGI_Emoji_Flag_Sequence ; flag: Tuvalu # E2.0 [1] (🇹🇻) 1F1F9 1F1FC ; RGI_Emoji_Flag_Sequence ; flag: Taiwan # E2.0 [1] (🇹🇼) diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt index b77027aa5..25f8b6154 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt @@ -1,5 +1,5 @@ # emoji-zwj-sequences.txt -# Date: 2023-05-03, 23:48:43 GMT +# Date: 2023-06-05, 20:04:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -365,6 +365,12 @@ # RGI_Emoji_ZWJ_Sequence: Role +1F3C3 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right # E15.1 [1] (🏃‍➡️) +1F3C3 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: light skin tone # E15.1 [1] (🏃🏻‍➡️) +1F3C3 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-light skin tone # E15.1 [1] (🏃🏼‍➡️) +1F3C3 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium skin tone # E15.1 [1] (🏃🏽‍➡️) +1F3C3 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾‍➡️) +1F3C3 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: dark skin tone # E15.1 [1] (🏃🏿‍➡️) 1F468 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker # E4.0 [1] (👨‍⚕️) 1F468 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge # E4.0 [1] (👨‍⚖️) 1F468 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot # E4.0 [1] (👨‍✈️) @@ -641,6 +647,18 @@ 1F469 1F3FF 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿‍🦼‍➡️) 1F469 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: dark skin tone # E12.0 [1] (👩🏿‍🦽) 1F469 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿‍🦽‍➡️) +1F6B6 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right # E15.1 [1] (🚶‍➡️) +1F6B6 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: light skin tone # E15.1 [1] (🚶🏻‍➡️) +1F6B6 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼‍➡️) +1F6B6 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium skin tone # E15.1 [1] (🚶🏽‍➡️) +1F6B6 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾‍➡️) +1F6B6 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: dark skin tone # E15.1 [1] (🚶🏿‍➡️) +1F9CE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right # E15.1 [1] (🧎‍➡️) +1F9CE 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: light skin tone # E15.1 [1] (🧎🏻‍➡️) +1F9CE 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼‍➡️) +1F9CE 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽‍➡️) +1F9CE 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾‍➡️) +1F9CE 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿‍➡️) 1F9D1 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker # E12.1 [1] (🧑‍⚕️) 1F9D1 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge # E12.1 [1] (🧑‍⚖️) 1F9D1 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot # E12.1 [1] (🧑‍✈️) @@ -786,7 +804,7 @@ 1F9D1 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: dark skin tone # E12.1 [1] (🧑🏿‍🦽) 1F9D1 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: dark skin tone # E15.1 [1] (🧑🏿‍🦽‍➡️) -# Total elements: 420 +# Total elements: 438 # ================================================ @@ -1491,12 +1509,6 @@ 2764 FE0F 200D 1FA79 ; RGI_Emoji_ZWJ_Sequence ; mending heart # E13.1 [1] (❤️‍🩹) 1F344 200D 1F7EB ; RGI_Emoji_ZWJ_Sequence ; brown mushroom # E15.1 [1] (🍄‍🟫) 1F34B 200D 1F7E9 ; RGI_Emoji_ZWJ_Sequence ; lime # E15.1 [1] (🍋‍🟩) -1F3C3 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right # E15.1 [1] (🏃‍➡️) -1F3C3 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: light skin tone # E15.1 [1] (🏃🏻‍➡️) -1F3C3 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-light skin tone # E15.1 [1] (🏃🏼‍➡️) -1F3C3 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium skin tone # E15.1 [1] (🏃🏽‍➡️) -1F3C3 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾‍➡️) -1F3C3 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: dark skin tone # E15.1 [1] (🏃🏿‍➡️) 1F3F3 FE0F 200D 26A7 FE0F ; RGI_Emoji_ZWJ_Sequence ; transgender flag # E13.0 [1] (🏳️‍⚧️) 1F3F3 FE0F 200D 1F308 ; RGI_Emoji_ZWJ_Sequence ; rainbow flag # E4.0 [1] (🏳️‍🌈) 1F3F4 200D 2620 FE0F ; RGI_Emoji_ZWJ_Sequence ; pirate flag # E11.0 [1] (🏴‍☠️) @@ -1511,19 +1523,7 @@ 1F636 200D 1F32B FE0F ; RGI_Emoji_ZWJ_Sequence ; face in clouds # E13.1 [1] (😶‍🌫️) 1F642 200D 2194 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking horizontally # E15.1 [1] (🙂‍↔️) 1F642 200D 2195 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking vertically # E15.1 [1] (🙂‍↕️) -1F6B6 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right # E15.1 [1] (🚶‍➡️) -1F6B6 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: light skin tone # E15.1 [1] (🚶🏻‍➡️) -1F6B6 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼‍➡️) -1F6B6 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium skin tone # E15.1 [1] (🚶🏽‍➡️) -1F6B6 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾‍➡️) -1F6B6 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: dark skin tone # E15.1 [1] (🚶🏿‍➡️) -1F9CE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right # E15.1 [1] (🧎‍➡️) -1F9CE 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: light skin tone # E15.1 [1] (🧎🏻‍➡️) -1F9CE 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼‍➡️) -1F9CE 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽‍➡️) -1F9CE 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾‍➡️) -1F9CE 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿‍➡️) -# Total elements: 37 +# Total elements: 19 #EOF diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin index 6d9381a38..0fb19403d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin index 3292dc8b6..f7bf20fb1 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin index c251eff2f..8af2cc103 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin index d7fcaff71..4d154dd0c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin index e88f2935c..37621bfe1 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin index dc9c71eb2..6de05489a 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin index e9434182e..934e70c08 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin index 3855f5801..7ecbe9121 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin index e7b31e1e1..137ea206c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin index e6a1df615..d0089782a 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin index 1bb2becdc..88547522d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin index f83439e80..5517f1a4a 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin index 59dd36151..eef1a6a23 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin index 24c4af826..75420c338 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin index cedc1b19a..b15729670 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin index c55c32261..348a73927 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin index ba81da82d..d4933d2bc 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin index 0ca22e389..384480e97 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin index 8ca4a6977..56a0dac25 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin index cd5786a94..f04e5f87d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin index c0bb98e5e..e74e1a354 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin index c1e7d7f4b..35aaa0146 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin index e190e2ebb..dec36868e 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin index ebdf44a58..29ef054dd 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin index 76091a7f6..286271390 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin index 1df2d5651..18e77589f 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin index e86c031f2..ae503185d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin index 9dbaef364..dab88d216 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin index 9cf9fa3c0..8fabd01c8 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin index ce4fe107d..5e5c2ba7f 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin index eeefc28e0..729ad90d5 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin index 6d31126f9..f62383515 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin index 465a00ac4..7d95f9ed4 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin index 7428526e3..2d0406425 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin index 84506b33f..d84cdd85e 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin index 7baaa0cec..caf44d10e 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin index c27fbbabf..40ffcc2be 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin index 65d881f91..17e647008 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin index 9bfa37e40..61c514bbe 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin index 201075e99..44df46ac0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin index 17ad6aac7..b81092202 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin index 1c51dc1dc..ce7b5f11d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin index 23241a1c5..834d072a3 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin index e3e0a3db2..a36d5af43 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin index 76b8f60ff..aa6da3679 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin index 631ecc65a..cd83a7281 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin index 56d5ddf57..f5cef05d6 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin index dbdd4db1d..2f07fb0b0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin index 512a30281..c56ef07c6 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin index 4d550dc35..8d0aa06e9 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin index 53781931a..0cd1cf695 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin index d60616f7c..8e70e6047 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin index 5438c6c48..0ef025ca5 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin index 2df353de6..aa7f3b76a 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin new file mode 100644 index 000000000..2838521f6 Binary files /dev/null and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin index f8668334e..49ef70a06 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin index 8e3087194..4ce2cb93c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin index a8dd5d2d1..040ae01c2 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin index c7d614b40..dc65d1bc3 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin index fe621ba68..429d67aea 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin index 816aff4eb..8b5bbe482 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin index 9c4ef39df..73536b47b 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin index 7954f0240..56eef5bbe 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin index caec127c9..f1955976b 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin index 6ccd13ae9..69ceca418 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin index bf2f51d09..6225ed7dd 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin index ed20d5132..fc203843d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin index 9c6390944..9174330a0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin index 82620bbc6..89b2c843d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin index 837289e73..85741e5f3 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin index 1b4b1fd76..4cb7d0b09 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin index fcb708d38..935cbeed0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin index 5b8d2bdb4..73066193e 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin index 5b33a6379..a251ddc66 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin index ddc2e857e..6c063aae0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin index 8708b6702..4467e672c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin index c8fc8557d..bbb617664 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin index 1a399339b..1bd685159 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin index 35752482b..a9ac908b4 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin index 643b748b1..d0e3d2ebf 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin index 532e7db79..a00dd8596 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin index 2b2cd7f93..eeb416a94 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin index d9d1c1446..d3e745a71 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin index c73a94837..bf74bcf58 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin index 30741d343..32f046a03 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin index bf8902d63..3705d0d36 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin index 583b46e04..e3530b6be 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin index 6d4153060..8d6ae3276 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin index 4a8f4bbad..d40257170 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin index bf8bfb5aa..6457f9a34 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin index 963f82fab..f9944df5c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin index 376d05fdf..e89e68021 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin index 78f98f66a..bd8b5eb6c 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin index 3a8507a9d..1bbb982c9 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin index 1fd4e9425..01fa821d2 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin index a5e54a321..0b89554b2 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin index 52026e7b9..c36a7bbe0 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin index 7ad321666..a7d8a41e9 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin index 0d277933d..93c0ea22a 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin index 852dfdb43..ccf6347f3 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin index 1a0853a99..0cfba1145 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin index a53e2cdfa..07a2e12bd 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin index 0f5f18b47..88d7316ad 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin index 9638a4526..ae518e560 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin index 567c69e19..9f869dff3 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin index 741ce21e2..885c1590e 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin index 08c28bf8c..c7d8de06d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin index 56fc7d81b..26d180a6b 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin index 95bf00524..6f490e083 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin index c25fceb6d..c947c5559 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin index ca69277fd..b0befdbe8 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin index 7ee2a9bb6..5a64aba5d 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin index ec00ed8f5..f001b53d4 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin index e02e771dc..650a9a6e9 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin index aa0f2d6e5..0b17ca5de 100644 Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin differ diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt index 116f67331..6d0135fad 100644 --- a/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt +++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt @@ -4773,7 +4773,7 @@ mhz;Mor (Mor Islands) mi;Māori mia;Miami mib;Atatláhuca Mixtec -mic;Mi'kmaq +mic;Mi'kmaw mid;Mandaic mie;Ocotepec Mixtec mif;Mofu-Gudur diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java index 7bd449fa7..aa92fdb08 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java @@ -1,21 +1,29 @@ package org.unicode.jsptest; +import static org.junit.jupiter.api.Assertions.assertTrue; + import com.ibm.icu.util.ULocale; import org.junit.jupiter.api.Test; import org.unicode.jsp.LanguageCode; -import org.unicode.unittest.TestFmwkMinusMinus; -public class TestLanguageid extends TestFmwkMinusMinus { +public class TestLanguageid { @Test public void TestParse() { - String results; - results = LanguageCode.validate("pap-CW", new ULocale("en")); - if (!assertTrue("", results.contains("Curaçao"))) { - errln(results); + { + final String results = LanguageCode.validate("pap-CW", new ULocale("en")); + final String expected = "Curaçao"; + assertContains(results, expected); + } + + { + final String results = LanguageCode.validate("$, eng-840, fr-fr", new ULocale("en")); + final String expected = "target='languageid'>fr-FR"; + assertContains(results, expected); } + } - results = LanguageCode.validate("$, eng-840, fr-fr", new ULocale("en")); - assertContains(results, "target='languageid'>fr-FR"); + private void assertContains(final String results, final String expected) { + assertTrue(results.contains(expected), () -> results + " did not contain " + expected); } } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java index 3398bbb8f..e05911654 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java @@ -34,6 +34,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.opentest4j.TestAbortedException; import org.unicode.jsp.CharEncoder; import org.unicode.jsp.Common; import org.unicode.jsp.UnicodeJsp; @@ -380,6 +381,9 @@ public void TestPerMill(final String name, final Charset charset) { CharEncoder encoder; try { encoder = new CharEncoder(charset, false, false); + } catch (UnsupportedOperationException e) { + // skip charsets that aren't supported + throw new TestAbortedException("Skipping charset " + charset.name(), e); } catch (Exception e) { e.printStackTrace(); assumeTrue(e == null, "Caught exception " + e); diff --git a/docs/unicodejsps/gcp-run.md b/docs/unicodejsps/gcp-run.md index c0ee56f51..90eaa8bdc 100644 --- a/docs/unicodejsps/gcp-run.md +++ b/docs/unicodejsps/gcp-run.md @@ -24,7 +24,7 @@ mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git -f UnicodeJsps/target/cld - build it ``` -docker build -t unicode/unicode-jsps . +docker build -t unicode/unicode-jsps UnicodeJsps/ ``` - try it @@ -45,14 +45,14 @@ docker run --rm -p 8080:8080 unicode/unicode-jsps - login to docker ``` -gcloud auth configure-docker \ - us-central1-docker.pkg.dev +gcloud auth configure-docker us-central1-docker.pkg.dev ``` -- build docker image +- build docker image and run it ``` -docker build -t us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest . +docker build -t us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest UnicodeJsps/ +docker run --rm -p 8080:8080 us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest ``` - push docker image diff --git a/docs/unicodejsps/index.md b/docs/unicodejsps/index.md index 477f20d08..c3d97f27c 100644 --- a/docs/unicodejsps/index.md +++ b/docs/unicodejsps/index.md @@ -1,11 +1,5 @@ # Building UnicodeJsp -- Note: you can run the latest UnicodeJsp locally with docker using: - -``` -docker run --rm -p 8080:8080 unicode/unicode-jsp -``` - - Note 2: there are some notes on updated processes for using GCP at [gcp-run.md](./gcp-run.md) - at present, automated deployment is TODO. ## Compiling @@ -113,7 +107,26 @@ Look at , and make sure that there aren't any Z-Other props at the bottom (you'll need to update via Adding New Properties if there are). -(:construction: **TODO**: explain how to do a Docker-based build here.) +### Running a Docker-based build + +compile java stuff + +- `mvn -B package -am -pl UnicodeJsps -DskipTests=true` + +”backup” copy of CLDR and UnicodeTools. (`~/src/cldr` is an optional existing CLDR dir to save a few packets) + +- `git clone --reference-if-able ~/src/cldr https://github.com/unicode-org/cldr.git || (cd cldr && git pull)` +- `mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git --exclude=unicodetools/target/ -f UnicodeJsps/target/cldr-unicodetools.tgz ./cldr/ ./unicodetools/` + +Now, finally build. + +- `docker build -t unicode/unicode-jsp:latest UnicodeJsps/` + +… And run. Control-C to cancel it, otherwise visit + +``` +docker run --rm -p 8080:8080 unicode/unicode-jsp:latest +``` ## Commit/PR diff --git a/pom.xml b/pom.xml index 8bca1d7f5..3aedfb69c 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ - 0.0.0-SNAPSHOT-66d15bfc1b + 0.0.0-SNAPSHOT-ba1c4f0cb1 @@ -120,7 +120,7 @@ maven-failsafe-plugin ${maven-surefire-plugin-version} - {devucddir / p.relative_to(genucddir)}" for p in to_move])) # noqa: E501 - confirm = bool(sys.argv[-1] == "-y") # enable running this in automation + confirm = bool("-y" in sys.argv[1:]) # enable running this in automation if not confirm: confirm = input("\nProceed [y/N]?").lower() == "y" diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt index dd8cb333e..0def17a03 100644 --- a/unicodetools/data/ucd/dev/ArabicShaping.txt +++ b/unicodetools/data/ucd/dev/ArabicShaping.txt @@ -828,6 +828,11 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group 10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group 10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA +# Arabic Extended-D Characters +10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL +10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH +10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF + # Sogdian Characters 10F30; SOGDIAN ALEPH; D; No_Joining_Group diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 8fa3eaad0..f213cc086 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -217,6 +217,7 @@ FFF0..FFFF; Specials 10500..1052F; Elbasan 10530..1056F; Caucasian Albanian 10570..105BF; Vithkuqi +105C0..105FF; Todhri 10600..1077F; Linear A 10780..107BF; Latin Extended-F 10800..1083F; Cypriot Syllabary @@ -239,6 +240,7 @@ FFF0..FFFF; Specials 10C00..10C4F; Old Turkic 10C80..10CFF; Old Hungarian 10D00..10D3F; Hanifi Rohingya +10D40..10D8F; Garay 10E60..10E7F; Rumi Numeral Symbols 10E80..10EBF; Yezidi 10EC0..10EFF; Arabic Extended-C @@ -264,6 +266,7 @@ FFF0..FFFF; Specials 11600..1165F; Modi 11660..1167F; Mongolian Supplement 11680..116CF; Takri +116D0..116FF; Myanmar Extended-C 11700..1174F; Ahom 11800..1184F; Dogra 118A0..118FF; Warang Citi @@ -274,6 +277,7 @@ FFF0..FFFF; Specials 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 11AC0..11AFF; Pau Cin Hau 11B00..11B5F; Devanagari Extended-A +11BC0..11BFF; Sunuwar 11C00..11C6F; Bhaiksuki 11C70..11CBF; Marchen 11D00..11D5F; Masaram Gondi @@ -308,6 +312,7 @@ FFF0..FFFF; Specials 1B170..1B2FF; Nushu 1BC00..1BC9F; Duployan 1BCA0..1BCAF; Shorthand Format Controls +1CC00..1CEBF; Symbols for Legacy Computing Supplement 1CF00..1CFCF; Znamenny Musical Notation 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 6e17556f4..8d4b90cd3 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -1,5 +1,5 @@ # CaseFolding-16.0.0.txt -# Date: 2023-10-03, 12:05:04 GMT +# Date: 2023-10-24, 09:19:17 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -603,6 +603,7 @@ 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK +1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE 1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN 1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN 1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN @@ -1526,6 +1527,28 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US +10D50; C; 10D70; # GARAY CAPITAL LETTER A +10D51; C; 10D71; # GARAY CAPITAL LETTER CA +10D52; C; 10D72; # GARAY CAPITAL LETTER MA +10D53; C; 10D73; # GARAY CAPITAL LETTER KA +10D54; C; 10D74; # GARAY CAPITAL LETTER BA +10D55; C; 10D75; # GARAY CAPITAL LETTER JA +10D56; C; 10D76; # GARAY CAPITAL LETTER SA +10D57; C; 10D77; # GARAY CAPITAL LETTER WA +10D58; C; 10D78; # GARAY CAPITAL LETTER LA +10D59; C; 10D79; # GARAY CAPITAL LETTER GA +10D5A; C; 10D7A; # GARAY CAPITAL LETTER DA +10D5B; C; 10D7B; # GARAY CAPITAL LETTER XA +10D5C; C; 10D7C; # GARAY CAPITAL LETTER YA +10D5D; C; 10D7D; # GARAY CAPITAL LETTER TA +10D5E; C; 10D7E; # GARAY CAPITAL LETTER RA +10D5F; C; 10D7F; # GARAY CAPITAL LETTER NYA +10D60; C; 10D80; # GARAY CAPITAL LETTER FA +10D61; C; 10D81; # GARAY CAPITAL LETTER NA +10D62; C; 10D82; # GARAY CAPITAL LETTER PA +10D63; C; 10D83; # GARAY CAPITAL LETTER HA +10D64; C; 10D84; # GARAY CAPITAL LETTER OLD KA +10D65; C; 10D85; # GARAY CAPITAL LETTER OLD NA 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index f9ef71678..96c344fa4 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2023-10-03, 12:05:05 GMT +# Date: 2023-10-24, 09:19:18 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2009,8 +2009,23 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT # Newly assigned in Unicode 16.0.0 (September, 2024) +0897 ; 16.0 # ARABIC PEPET +0C5C ; 16.0 # TELUGU ARCHAIC SHRII +0CDC ; 16.0 # KANNADA ARCHAIC SHRII +1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE A7CB ; 16.0 # LATIN CAPITAL LETTER RAMS HORN - -# Total code points: 1 +105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO +10D40..10D65 ; 16.0 # [38] GARAY DIGIT ZERO..GARAY CAPITAL LETTER OLD NA +10D69..10D85 ; 16.0 # [29] GARAY VOWEL SIGN E..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; 16.0 # [2] GARAY PLUS SIGN..GARAY MINUS SIGN +10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; 16.0 # ARABIC COMBINING ALEF OVERLAY +116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE +11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO +11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE +1CEB0..1CEB3 ; 16.0 # [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET +1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK + +# Total code points: 200 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 2890a1237..651bed30b 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-10-03, 12:05:25 GMT +# Date: 2023-10-24, 09:19:36 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -177,6 +177,7 @@ FF5C ; Math # Sm FULLWIDTH VERTICAL LINE FF5E ; Math # Sm FULLWIDTH TILDE FFE2 ; Math # Sm FULLWIDTH NOT SIGN FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Math # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 1D400..1D454 ; Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -253,7 +254,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 2310 +# Total code points: 2312 # ================================================ @@ -343,6 +344,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0897 ; Alphabetic # Mn ARABIC PEPET 08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH 08D4..08DF ; Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA @@ -474,7 +476,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Alphabetic # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU @@ -494,7 +496,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Alphabetic # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -710,7 +712,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1C4D..1C4F ; Alphabetic # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; Alphabetic # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; Alphabetic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Alphabetic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Alphabetic # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Alphabetic # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -998,6 +1000,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 105A3..105B1 ; Alphabetic # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; Alphabetic # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Alphabetic # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; Alphabetic # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; Alphabetic # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1038,9 +1041,18 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10CC0..10CF2 ; Alphabetic # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; Alphabetic # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4A..10D4D ; Alphabetic # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; Alphabetic # Lm GARAY VOWEL LENGTH MARK +10D4F ; Alphabetic # Lo GARAY SUKUN +10D50..10D65 ; Alphabetic # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69 ; Alphabetic # Mn GARAY VOWEL SIGN E +10D6F ; Alphabetic # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; Alphabetic # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; Alphabetic # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1211,6 +1223,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA 11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA @@ -1402,7 +1415,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138388 +# Total code points: 138534 # ================================================ @@ -1691,6 +1704,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Lowercase # L& CYRILLIC SMALL LETTER TJE 1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G @@ -2060,6 +2074,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Lowercase # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -2096,7 +2111,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2544 +# Total code points: 2567 # ================================================ @@ -2379,6 +2394,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN 10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Uppercase # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Uppercase # L& CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Uppercase # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Uppercase # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -2718,6 +2734,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1058C..10592 ; Uppercase # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Uppercase # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Uppercase # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Uppercase # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -2756,7 +2773,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1952 +# Total code points: 1975 # ================================================ @@ -2801,7 +2818,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH 10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Cased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Cased # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Cased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Cased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL @@ -2898,6 +2915,8 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 107B2..107BA ; Cased # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Cased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Cased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G @@ -2939,7 +2958,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4527 +# Total code points: 4573 # ================================================ @@ -3016,7 +3035,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK 0888 ; Case_Ignorable # Sk ARABIC RAISED ROUND DOT 0890..0891 ; Case_Ignorable # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; Case_Ignorable # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Case_Ignorable # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08C9 ; Case_Ignorable # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; Case_Ignorable # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E2 ; Case_Ignorable # Cf ARABIC DISPUTED END OF AYAH @@ -3297,8 +3316,11 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI 10A3F ; Case_Ignorable # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Case_Ignorable # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Case_Ignorable # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; Case_Ignorable # Lm GARAY REDUPLICATION MARK 10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Case_Ignorable # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA @@ -3441,7 +3463,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2707 +# Total code points: 2716 # ================================================ @@ -3725,6 +3747,7 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC 10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN 10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Changes_When_Lowercased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Changes_When_Lowercased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Lowercased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -4057,11 +4080,12 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 1058C..10592 ; Changes_When_Lowercased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Changes_When_Lowercased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Changes_When_Lowercased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Lowercased # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1434 +# Total code points: 1457 # ================================================ @@ -4358,6 +4382,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10FD..10FF ; Changes_When_Uppercased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Changes_When_Uppercased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Changes_When_Uppercased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TJE 1D79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE 1D8E ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK @@ -4693,11 +4718,12 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 105B3..105B9 ; Changes_When_Uppercased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Changes_When_Uppercased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Changes_When_Uppercased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Uppercased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1526 +# Total code points: 1549 # ================================================ @@ -4993,6 +5019,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER 0561..0587 ; Changes_When_Titlecased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN 13F8..13FD ; Changes_When_Titlecased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Changes_When_Titlecased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TJE 1D79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR G 1D7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE 1D8E ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK @@ -5328,11 +5355,12 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 105B3..105B9 ; Changes_When_Titlecased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Changes_When_Titlecased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Changes_When_Titlecased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Changes_When_Titlecased # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 1453 +# Total code points: 1476 # ================================================ @@ -5623,7 +5651,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER 10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN 10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN 13F8..13FD ; Changes_When_Casefolded # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Changes_When_Casefolded # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C89 ; Changes_When_Casefolded # L& [10] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Changes_When_Casefolded # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Casefolded # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW @@ -5961,11 +5989,12 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 1058C..10592 ; Changes_When_Casefolded # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Changes_When_Casefolded # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Changes_When_Casefolded # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Changes_When_Casefolded # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1507 +# Total code points: 1530 # ================================================ @@ -6027,7 +6056,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE 10FD..10FF ; Changes_When_Casemapped # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13A0..13F5 ; Changes_When_Casemapped # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV 13F8..13FD ; Changes_When_Casemapped # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV -1C80..1C88 ; Changes_When_Casemapped # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Changes_When_Casemapped # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; Changes_When_Casemapped # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Changes_When_Casemapped # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G @@ -6101,11 +6130,13 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 105BB..105BC ; Changes_When_Casemapped # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10C80..10CB2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D50..10D65 ; Changes_When_Casemapped # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D70..10D85 ; Changes_When_Casemapped # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2929 +# Total code points: 2975 # ================================================ @@ -6247,7 +6278,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -6256,7 +6287,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -6364,7 +6395,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER 1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; ID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; ID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; ID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; ID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -6603,6 +6634,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 105A3..105B1 ; ID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; ID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; ID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; ID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -6639,8 +6671,15 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C80..10CB2 ; ID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; ID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; ID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; ID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Start # Lo GARAY SUKUN +10D50..10D65 ; ID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; ID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -6713,6 +6752,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11A5C..11A89 ; ID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; ID_Start # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; ID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; ID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; ID_Start # Lo BHAIKSUKI SIGN AVAGRAHA @@ -6859,7 +6899,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136968 +# Total code points: 137111 # ================================================ @@ -6966,7 +7006,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0898..089F ; ID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; ID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -7115,7 +7155,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -7137,7 +7177,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -7399,7 +7439,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1C50..1C59 ; ID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE 1C5A..1C77 ; ID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; ID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; ID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; ID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; ID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CD0..1CD2 ; ID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -7735,6 +7775,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 105A3..105B1 ; ID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; ID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; ID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; ID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -7779,10 +7820,19 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 10D00..10D23 ; ID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; ID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; ID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; ID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; ID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; ID_Continue # Lo GARAY SUKUN +10D50..10D65 ; ID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; ID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; ID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; ID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -7929,6 +7979,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA 116B8 ; ID_Continue # Lo TAKRI LETTER ARCHAIC KHA 116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; ID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; ID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -7988,6 +8039,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; ID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; ID_Continue # Mc BHAIKSUKI VOWEL SIGN AA @@ -8218,7 +8271,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140109 +# Total code points: 140299 # ================================================ @@ -8357,7 +8410,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -8366,7 +8419,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -8474,7 +8527,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR 1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; XID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; XID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; XID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; XID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -8717,6 +8770,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 105A3..105B1 ; XID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; XID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; XID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -8753,8 +8807,15 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 10C80..10CB2 ; XID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; XID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; XID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; XID_Start # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Start # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Start # Lo GARAY SUKUN +10D50..10D65 ; XID_Start # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; XID_Start # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Start # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -8827,6 +8888,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 11A5C..11A89 ; XID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; XID_Start # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; XID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; XID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; XID_Start # Lo BHAIKSUKI SIGN AVAGRAHA @@ -8973,7 +9035,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 136945 +# Total code points: 137088 # ================================================ @@ -9076,7 +9138,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA 0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT 0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL -0898..089F ; XID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; XID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -9225,7 +9287,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; XID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -9247,7 +9309,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; XID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -9509,7 +9571,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE 1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; XID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; XID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; XID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; XID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA @@ -9850,6 +9912,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 105A3..105B1 ; XID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; XID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; XID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; XID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -9894,10 +9957,19 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 10D00..10D23 ; XID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; XID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; XID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; XID_Continue # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; XID_Continue # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; XID_Continue # Lm GARAY VOWEL LENGTH MARK +10D4F ; XID_Continue # Lo GARAY SUKUN +10D50..10D65 ; XID_Continue # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; XID_Continue # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6F ; XID_Continue # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; XID_Continue # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; XID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -10044,6 +10116,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA 116B8 ; XID_Continue # Lo TAKRI LETTER ARCHAIC KHA 116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; XID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -10103,6 +10176,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER 11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; XID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; XID_Continue # Mc BHAIKSUKI VOWEL SIGN AA @@ -10333,7 +10408,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 140090 +# Total code points: 140280 # ================================================ @@ -10418,7 +10493,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ........ -# Total code points: 10492 +# Total code points: 10515 # ================================================ @@ -9653,6 +9684,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] ........ -# Total code points: 10454 +# Total code points: 10477 # ================================================ @@ -15413,7 +15467,7 @@ E01F0..E0FFF ; NFKC_SCF; # Cn [3600] ........ -# Total code points: 10492 +# Total code points: 10515 # EOF diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index f9fa02ceb..d0bc8d7c5 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2023-10-03, 12:05:30 GMT +# Date: 2023-10-24, 09:19:42 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -334,7 +334,7 @@ 0888 ; N # Sk ARABIC RAISED ROUND DOT 0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; N # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -502,7 +502,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -528,7 +528,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -859,7 +859,7 @@ 1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; N # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -1870,6 +1870,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 105A3..105B1 ; N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; N # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1942,12 +1943,23 @@ FFFD ; A # So REPLACEMENT CHARACTER 10D00..10D23 ; N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; N # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; N # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; N # Lm GARAY VOWEL LENGTH MARK +10D4F ; N # Lo GARAY SUKUN +10D50..10D65 ; N # Lu [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; N # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6E ; N # Pd GARAY HYPHEN +10D6F ; N # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; N # Ll [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; N # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2123,6 +2135,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; N # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -2195,6 +2208,9 @@ FFFD ; A # So REPLACEMENT CHARACTER 11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; N # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA @@ -2327,6 +2343,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1BC9D..1BC9E ; N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1BC9F ; N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; N # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2574,7 +2591,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; N # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C..1F93A ; W # So [47] PINCHED FINGERS..FENCER 1F93B ; N # So MODERN PENTATHLON diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt index a7c5aef60..9b5aabfa0 100644 --- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt +++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt @@ -1,11 +1,11 @@ -# IndicPositionalCategory-15.1.0.txt -# Date: 2023-01-05 +# IndicPositionalCategory-16.0.0.txt +# Date: 2023-10-02, 22:58:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # -# For documentation, see UAX #44: Unicode Character Database, -# at https://www.unicode.org/reports/tr44/ +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file defines the following property: # diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index f2623b471..5de0d7554 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,11 +1,11 @@ -# IndicSyllabicCategory-15.1.0.txt -# Date: 2023-01-05 +# IndicSyllabicCategory-16.0.0.txt +# Date: 2023-10-02, 22:58:33 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # -# For documentation, see UAX #44: Unicode Character Database, -# at https://www.unicode.org/reports/tr44/ +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ # # This file defines the following property: # @@ -1335,7 +1335,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI # script, e.g. in Brahmi) # # Note: These are different from Numbers, in the way that there is no known -# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. +# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. # Until such evidence is found, implementations may assume that Brahmi # Joining Numbers only participate in shaping with other Brahmi Joining # Numbers. diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 0928bdca1..a1d3641ed 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-10-03, 12:05:31 GMT +# Date: 2023-10-24, 09:19:43 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -280,7 +280,7 @@ 0888 ; AL # Sk ARABIC RAISED ROUND DOT 0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL 0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; CM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; AL # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; CM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -449,7 +449,7 @@ 0C4A..0C4D ; CM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; CM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; CM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -475,7 +475,7 @@ 0CCA..0CCB ; CM # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; CM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; CM # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -832,7 +832,7 @@ 1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; BA # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; AL # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; AL # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; AL # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; AL # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -2721,6 +2721,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 105A3..105B1 ; AL # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; AL # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; AL # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; AL # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; AL # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; AL # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -2795,12 +2796,23 @@ FFFD ; AI # So REPLACEMENT CHARACTER 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; NU # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; NU # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; AL # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; AL # Lm GARAY VOWEL LENGTH MARK +10D4F ; AL # Lo GARAY SUKUN +10D50..10D65 ; AL # Lu [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6E ; BA # Pd GARAY HYPHEN +10D6F ; AL # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; AL # Ll [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; AL # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; AL # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; CM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -2994,6 +3006,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 116B8 ; AL # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; AL # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; SA # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; SA # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; SA # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -3071,6 +3084,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER 11AB0..11ABF ; AL # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; BB # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; AL # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; CM # Mc BHAIKSUKI VOWEL SIGN AA @@ -3232,6 +3248,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1BC9D..1BC9E ; CM # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1BCA0..1BCA3 ; CM # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; AL # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF00..1CF2D ; CM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; CM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -3531,7 +3548,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8AE..1F8AF ; ID # Cn [2] .. 1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST -1F8B2..1F8FF ; ID # Cn [78] .. +1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK +1F8B3..1F8FF ; ID # Cn [77] .. 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C ; EB # So PINCHED FINGERS 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 2e8857424..ccd79ae54 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ -# NormalizationTest-15.1.0.txt -# Date: 2023-01-05, 20:34:44 GMT +# NormalizationTest-16.0.0.txt +# Date: 2023-10-23, 12:24:21 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -15130,6 +15130,8 @@ FFEB;FFEB;FFEB;2192;2192; # (→; →; →; →; →; ) HALFWIDTH RIGHTWARDS ARR FFEC;FFEC;FFEC;2193;2193; # (↓; ↓; ↓; ↓; ↓; ) HALFWIDTH DOWNWARDS ARROW FFED;FFED;FFED;25A0;25A0; # (■; ■; ■; ■; ■; ) HALFWIDTH BLACK SQUARE FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE +105C9;105C9;105D2 0307;105C9;105D2 0307; # (𐗉; 𐗉; 𐗒◌̇; 𐗉; 𐗒◌̇; ) TODHRI LETTER EI +105E4;105E4;105DA 0307;105E4;105DA 0307; # (𐗤; 𐗤; 𐗚◌̇; 𐗤; 𐗚◌̇; ) TODHRI LETTER U 10781;10781;10781;02D0;02D0; # (𐞁; 𐞁; 𐞁; ː; ː; ) MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON 10782;10782;10782;02D1;02D1; # (𐞂; 𐞂; 𐞂; ˑ; ˑ; ) MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON 10783;10783;10783;00E6;00E6; # (𐞃; 𐞃; 𐞃; æ; æ; ) MODIFIER LETTER SMALL AE @@ -17664,6 +17666,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 085A 059A 0316 1DFA 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062; # (a◌࡚◌֚◌̖◌᷺b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; ) LATIN SMALL LETTER A, MANDAIC VOCALIZATION MARK, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B 0061 059A 0316 1DFA 085B 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062; # (a◌֚◌̖◌᷺◌࡛b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, MANDAIC GEMINATION MARK, LATIN SMALL LETTER B 0061 085B 059A 0316 1DFA 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062; # (a◌࡛◌֚◌̖◌᷺b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; ) LATIN SMALL LETTER A, MANDAIC GEMINATION MARK, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B +0061 0315 0300 05AE 0897 0062;00E0 05AE 0897 0315 0062;0061 05AE 0300 0897 0315 0062;00E0 05AE 0897 0315 0062;0061 05AE 0300 0897 0315 0062; # (a◌̕◌̀◌֮◌ࢗb; à◌֮◌ࢗ◌̕b; a◌֮◌̀◌ࢗ◌̕b; à◌֮◌ࢗ◌̕b; a◌֮◌̀◌ࢗ◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC PEPET, LATIN SMALL LETTER B +0061 0897 0315 0300 05AE 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062; # (a◌ࢗ◌̕◌̀◌֮b; a◌֮◌ࢗ◌̀◌̕b; a◌֮◌ࢗ◌̀◌̕b; a◌֮◌ࢗ◌̀◌̕b; a◌֮◌ࢗ◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC PEPET, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 0898 0062;00E0 05AE 0898 0315 0062;0061 05AE 0300 0898 0315 0062;00E0 05AE 0898 0315 0062;0061 05AE 0300 0898 0315 0062; # (a◌̕◌̀◌֮◌࢘b; à◌֮◌࢘◌̕b; a◌֮◌̀◌࢘◌̕b; à◌֮◌࢘◌̕b; a◌֮◌̀◌࢘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH WORD AL-JUZ, LATIN SMALL LETTER B 0061 0898 0315 0300 05AE 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062; # (a◌࢘◌̕◌̀◌֮b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH WORD AL-JUZ, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 059A 0316 1DFA 0899 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062; # (a◌֚◌̖◌᷺◌࢙b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD ISHMAAM, LATIN SMALL LETTER B @@ -18554,6 +18558,16 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 10D26 0315 0300 05AE 0062;0061 05AE 10D26 0300 0315 0062;0061 05AE 10D26 0300 0315 0062;0061 05AE 10D26 0300 0315 0062;0061 05AE 10D26 0300 0315 0062; # (a◌𐴦◌̕◌̀◌֮b; a◌֮◌𐴦◌̀◌̕b; a◌֮◌𐴦◌̀◌̕b; a◌֮◌𐴦◌̀◌̕b; a◌֮◌𐴦◌̀◌̕b; ) LATIN SMALL LETTER A, HANIFI ROHINGYA SIGN TANA, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10D27 0062;00E0 05AE 10D27 0315 0062;0061 05AE 0300 10D27 0315 0062;00E0 05AE 10D27 0315 0062;0061 05AE 0300 10D27 0315 0062; # (a◌̕◌̀◌֮◌𐴧b; à◌֮◌𐴧◌̕b; a◌֮◌̀◌𐴧◌̕b; à◌֮◌𐴧◌̕b; a◌֮◌̀◌𐴧◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, HANIFI ROHINGYA SIGN TASSI, LATIN SMALL LETTER B 0061 10D27 0315 0300 05AE 0062;0061 05AE 10D27 0300 0315 0062;0061 05AE 10D27 0300 0315 0062;0061 05AE 10D27 0300 0315 0062;0061 05AE 10D27 0300 0315 0062; # (a◌𐴧◌̕◌̀◌֮b; a◌֮◌𐴧◌̀◌̕b; a◌֮◌𐴧◌̀◌̕b; a◌֮◌𐴧◌̀◌̕b; a◌֮◌𐴧◌̀◌̕b; ) LATIN SMALL LETTER A, HANIFI ROHINGYA SIGN TASSI, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10D69 0062;00E0 05AE 10D69 0315 0062;0061 05AE 0300 10D69 0315 0062;00E0 05AE 10D69 0315 0062;0061 05AE 0300 10D69 0315 0062; # (a◌̕◌̀◌֮◌𐵩b; à◌֮◌𐵩◌̕b; a◌֮◌̀◌𐵩◌̕b; à◌֮◌𐵩◌̕b; a◌֮◌̀◌𐵩◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, GARAY VOWEL SIGN E, LATIN SMALL LETTER B +0061 10D69 0315 0300 05AE 0062;0061 05AE 10D69 0300 0315 0062;0061 05AE 10D69 0300 0315 0062;0061 05AE 10D69 0300 0315 0062;0061 05AE 10D69 0300 0315 0062; # (a◌𐵩◌̕◌̀◌֮b; a◌֮◌𐵩◌̀◌̕b; a◌֮◌𐵩◌̀◌̕b; a◌֮◌𐵩◌̀◌̕b; a◌֮◌𐵩◌̀◌̕b; ) LATIN SMALL LETTER A, GARAY VOWEL SIGN E, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10D6A 0062;00E0 05AE 10D6A 0315 0062;0061 05AE 0300 10D6A 0315 0062;00E0 05AE 10D6A 0315 0062;0061 05AE 0300 10D6A 0315 0062; # (a◌̕◌̀◌֮◌𐵪b; à◌֮◌𐵪◌̕b; a◌֮◌̀◌𐵪◌̕b; à◌֮◌𐵪◌̕b; a◌֮◌̀◌𐵪◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, GARAY CONSONANT GEMINATION MARK, LATIN SMALL LETTER B +0061 10D6A 0315 0300 05AE 0062;0061 05AE 10D6A 0300 0315 0062;0061 05AE 10D6A 0300 0315 0062;0061 05AE 10D6A 0300 0315 0062;0061 05AE 10D6A 0300 0315 0062; # (a◌𐵪◌̕◌̀◌֮b; a◌֮◌𐵪◌̀◌̕b; a◌֮◌𐵪◌̀◌̕b; a◌֮◌𐵪◌̀◌̕b; a◌֮◌𐵪◌̀◌̕b; ) LATIN SMALL LETTER A, GARAY CONSONANT GEMINATION MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10D6B 0062;00E0 05AE 10D6B 0315 0062;0061 05AE 0300 10D6B 0315 0062;00E0 05AE 10D6B 0315 0062;0061 05AE 0300 10D6B 0315 0062; # (a◌̕◌̀◌֮◌𐵫b; à◌֮◌𐵫◌̕b; a◌֮◌̀◌𐵫◌̕b; à◌֮◌𐵫◌̕b; a◌֮◌̀◌𐵫◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, GARAY COMBINING DOT ABOVE, LATIN SMALL LETTER B +0061 10D6B 0315 0300 05AE 0062;0061 05AE 10D6B 0300 0315 0062;0061 05AE 10D6B 0300 0315 0062;0061 05AE 10D6B 0300 0315 0062;0061 05AE 10D6B 0300 0315 0062; # (a◌𐵫◌̕◌̀◌֮b; a◌֮◌𐵫◌̀◌̕b; a◌֮◌𐵫◌̀◌̕b; a◌֮◌𐵫◌̀◌̕b; a◌֮◌𐵫◌̀◌̕b; ) LATIN SMALL LETTER A, GARAY COMBINING DOT ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10D6C 0062;00E0 05AE 10D6C 0315 0062;0061 05AE 0300 10D6C 0315 0062;00E0 05AE 10D6C 0315 0062;0061 05AE 0300 10D6C 0315 0062; # (a◌̕◌̀◌֮◌𐵬b; à◌֮◌𐵬◌̕b; a◌֮◌̀◌𐵬◌̕b; à◌֮◌𐵬◌̕b; a◌֮◌̀◌𐵬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, GARAY COMBINING DOUBLE DOT ABOVE, LATIN SMALL LETTER B +0061 10D6C 0315 0300 05AE 0062;0061 05AE 10D6C 0300 0315 0062;0061 05AE 10D6C 0300 0315 0062;0061 05AE 10D6C 0300 0315 0062;0061 05AE 10D6C 0300 0315 0062; # (a◌𐵬◌̕◌̀◌֮b; a◌֮◌𐵬◌̀◌̕b; a◌֮◌𐵬◌̀◌̕b; a◌֮◌𐵬◌̀◌̕b; a◌֮◌𐵬◌̀◌̕b; ) LATIN SMALL LETTER A, GARAY COMBINING DOUBLE DOT ABOVE, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 0315 0300 05AE 10D6D 0062;00E0 05AE 10D6D 0315 0062;0061 05AE 0300 10D6D 0315 0062;00E0 05AE 10D6D 0315 0062;0061 05AE 0300 10D6D 0315 0062; # (a◌̕◌̀◌֮◌𐵭b; à◌֮◌𐵭◌̕b; a◌֮◌̀◌𐵭◌̕b; à◌֮◌𐵭◌̕b; a◌֮◌̀◌𐵭◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, GARAY CONSONANT NASALIZATION MARK, LATIN SMALL LETTER B +0061 10D6D 0315 0300 05AE 0062;0061 05AE 10D6D 0300 0315 0062;0061 05AE 10D6D 0300 0315 0062;0061 05AE 10D6D 0300 0315 0062;0061 05AE 10D6D 0300 0315 0062; # (a◌𐵭◌̕◌̀◌֮b; a◌֮◌𐵭◌̀◌̕b; a◌֮◌𐵭◌̀◌̕b; a◌֮◌𐵭◌̀◌̕b; a◌֮◌𐵭◌̀◌̕b; ) LATIN SMALL LETTER A, GARAY CONSONANT NASALIZATION MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAB 0062;00E0 05AE 10EAB 0315 0062;0061 05AE 0300 10EAB 0315 0062;00E0 05AE 10EAB 0315 0062;0061 05AE 0300 10EAB 0315 0062; # (a◌̕◌̀◌֮◌𐺫b; à◌֮◌𐺫◌̕b; a◌֮◌̀◌𐺫◌̕b; à◌֮◌𐺫◌̕b; a◌֮◌̀◌𐺫◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING HAMZA MARK, LATIN SMALL LETTER B 0061 10EAB 0315 0300 05AE 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062;0061 05AE 10EAB 0300 0315 0062; # (a◌𐺫◌̕◌̀◌֮b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; a◌֮◌𐺫◌̀◌̕b; ) LATIN SMALL LETTER A, YEZIDI COMBINING HAMZA MARK, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 10EAC 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062;00E0 05AE 10EAC 0315 0062;0061 05AE 0300 10EAC 0315 0062; # (a◌̕◌̀◌֮◌𐺬b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; à◌֮◌𐺬◌̕b; a◌֮◌̀◌𐺬◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, YEZIDI COMBINING MADDA MARK, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 777e8a288..2fa585a17 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ -# PropList-15.1.0.txt -# Date: 2023-08-01, 21:56:53 GMT +# PropList-16.0.0.txt +# Date: 2023-10-23, 12:20:11 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -62,9 +62,10 @@ FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTA FE58 ; Dash # Pd SMALL EM DASH FE63 ; Dash # Pd SMALL HYPHEN-MINUS FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS +10D6E ; Dash # Pd GARAY HYPHEN 10EAD ; Dash # Pd YEZIDI HYPHENATION MARK -# Total code points: 30 +# Total code points: 31 # ================================================ @@ -450,6 +451,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0897 ; Other_Alphabetic # Mn ARABIC PEPET 08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA 08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN 08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA @@ -689,7 +691,9 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O 10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA 10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69 ; Other_Alphabetic # Mn GARAY VOWEL SIGN E 10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY 11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU 11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA 11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA @@ -834,7 +838,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1425 +# Total code points: 1428 # ================================================ @@ -1042,6 +1046,8 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D4E ; Diacritic # Lm GARAY VOWEL LENGTH MARK +10D69..10D6D ; Diacritic # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EFD..10EFF ; Diacritic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW @@ -1103,7 +1109,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1144 +# Total code points: 1150 # ================================================ @@ -1132,6 +1138,9 @@ AADD ; Extender # Lm TAI VIET SYMBOL SAM AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK 10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +10D4E ; Extender # Lm GARAY VOWEL LENGTH MARK +10D6A ; Extender # Mn GARAY CONSONANT GEMINATION MARK +10D6F ; Extender # Lm GARAY REDUPLICATION MARK 1135D ; Extender # Lo GRANTHA SIGN PLUTA 115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 11A98 ; Extender # Mn SOYOMBO GEMINATION MARK @@ -1141,7 +1150,7 @@ FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND 1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER 1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK -# Total code points: 50 +# Total code points: 53 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 9039e9eb2..82f13eefd 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ -# PropertyValueAliases-15.1.0.txt -# Date: 2023-08-07, 15:21:34 GMT +# PropertyValueAliases-16.0.0.txt +# Date: 2023-10-23, 18:45:09 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -258,6 +258,7 @@ blk; Ethiopic_Ext ; Ethiopic_Extended blk; Ethiopic_Ext_A ; Ethiopic_Extended_A blk; Ethiopic_Ext_B ; Ethiopic_Extended_B blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Garay ; Garay blk; Geometric_Shapes ; Geometric_Shapes blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended blk; Georgian ; Georgian @@ -368,6 +369,7 @@ blk; Music ; Musical_Symbols blk; Myanmar ; Myanmar blk; Myanmar_Ext_A ; Myanmar_Extended_A blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Myanmar_Ext_C ; Myanmar_Extended_C blk; Nabataean ; Nabataean blk; Nag_Mundari ; Nag_Mundari blk; Nandinagari ; Nandinagari @@ -426,6 +428,7 @@ blk; Soyombo ; Soyombo blk; Specials ; Specials blk; Sundanese ; Sundanese blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sunuwar ; Sunuwar blk; Sup_Arrows_A ; Supplemental_Arrows_A blk; Sup_Arrows_B ; Supplemental_Arrows_B blk; Sup_Arrows_C ; Supplemental_Arrows_C @@ -439,6 +442,7 @@ blk; Sutton_SignWriting ; Sutton_SignWriting blk; Syloti_Nagri ; Syloti_Nagri blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A blk; Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +blk; Symbols_For_Legacy_Computing_Sup ; Symbols_For_Legacy_Computing_Supplement blk; Syriac ; Syriac blk; Syriac_Sup ; Syriac_Supplement blk; Tagalog ; Tagalog @@ -461,6 +465,7 @@ blk; Thai ; Thai blk; Tibetan ; Tibetan blk; Tifinagh ; Tifinagh blk; Tirhuta ; Tirhuta +blk; Todhri ; Todhri blk; Toto ; Toto blk; Transport_And_Map ; Transport_And_Map_Symbols blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics @@ -1327,6 +1332,7 @@ sc ; Egyp ; Egyptian_Hieroglyphs sc ; Elba ; Elbasan sc ; Elym ; Elymaic sc ; Ethi ; Ethiopic +sc ; Gara ; Garay sc ; Geor ; Georgian sc ; Glag ; Glagolitic sc ; Gong ; Gunjala_Gondi @@ -1424,6 +1430,7 @@ sc ; Sogo ; Old_Sogdian sc ; Sora ; Sora_Sompeng sc ; Soyo ; Soyombo sc ; Sund ; Sundanese +sc ; Sunu ; Sunuwar sc ; Sylo ; Syloti_Nagri sc ; Syrc ; Syriac sc ; Tagb ; Tagbanwa @@ -1441,6 +1448,7 @@ sc ; Thai ; Thai sc ; Tibt ; Tibetan sc ; Tirh ; Tirhuta sc ; Tnsa ; Tangsa +sc ; Todr ; Todhri sc ; Toto ; Toto sc ; Ugar ; Ugaritic sc ; Vaii ; Vai diff --git a/unicodetools/data/ucd/dev/ScriptExtensions.txt b/unicodetools/data/ucd/dev/ScriptExtensions.txt index 23141fb82..99ae29159 100644 --- a/unicodetools/data/ucd/dev/ScriptExtensions.txt +++ b/unicodetools/data/ucd/dev/ScriptExtensions.txt @@ -1,5 +1,5 @@ -# ScriptExtensions-15.1.0.txt -# Date: 2023-02-01, 23:02:24 GMT +# ScriptExtensions-16.0.0.txt +# Date: 2023-10-09, 14:49:47 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -497,15 +497,6 @@ FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT # ================================================ -# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi - -060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA -061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON - -# Total code points: 2 - -# ================================================ - # Script_Extensions=Bopo Hang Hani Hira Kana Yiii 3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP @@ -545,9 +536,18 @@ FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC C # ================================================ -# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi +# Script_Extensions=Arab Gara Nkoo Rohg Syrc Thaa Yezi + +060C ; Arab Gara Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA +061B ; Arab Gara Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Adlm Arab Gara Nkoo Rohg Syrc Thaa Yezi -061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK +061F ; Adlm Arab Gara Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK # Total code points: 1 diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 4fd75656a..6218f762a 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2023-10-03, 12:05:49 GMT +# Date: 2023-10-24, 09:19:59 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -522,6 +522,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND 102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED 1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; Common # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -614,7 +615,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; Common # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; Common # So [13] BALLET SHOES..CRUTCH @@ -630,7 +631,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8306 +# Total code points: 8311 # ================================================ @@ -769,7 +770,7 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA 0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE 0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN 048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER -1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; Cyrillic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL 1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN 2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS @@ -788,7 +789,7 @@ FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBININ 1E030..1E06D ; Cyrillic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E08F ; Cyrillic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -# Total code points: 506 +# Total code points: 508 # ================================================ @@ -868,7 +869,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU 0888 ; Arabic # Sk ARABIC RAISED ROUND DOT 0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL 0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; Arabic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; Arabic # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; Arabic # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -886,7 +887,8 @@ FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM. FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -10EFD..10EFF ; Arabic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL 1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF 1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM @@ -922,7 +924,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA 1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN 1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 1368 +# Total code points: 1373 # ================================================ @@ -1151,7 +1153,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Telugu # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -1159,7 +1161,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; Telugu # So TELUGU SIGN TUUMU -# Total code points: 100 +# Total code points: 101 # ================================================ @@ -1182,14 +1184,14 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY 0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Kannada # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0CF3 ; Kannada # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT -# Total code points: 91 +# Total code points: 92 # ================================================ @@ -1369,8 +1371,9 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE AA7C ; Myanmar # Mn MYANMAR SIGN TAI LAING TONE-2 AA7D ; Myanmar # Mc MYANMAR SIGN TAI LAING TONE-5 AA7E..AA7F ; Myanmar # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA +116D0..116E3 ; Myanmar # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE -# Total code points: 223 +# Total code points: 243 # ================================================ @@ -3030,4 +3033,33 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI # Total code points: 42 +# ================================================ + +10D40..10D49 ; Garay # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; Garay # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; Garay # Lm GARAY VOWEL LENGTH MARK +10D4F ; Garay # Lo GARAY SUKUN +10D50..10D65 ; Garay # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; Garay # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6E ; Garay # Pd GARAY HYPHEN +10D6F ; Garay # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; Garay # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; Garay # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN + +# Total code points: 69 + +# ================================================ + +11BC0..11BE0 ; Sunuwar # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; Sunuwar # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; Sunuwar # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE + +# Total code points: 44 + +# ================================================ + +105C0..105F3 ; Todhri # Lo [52] TODHRI LETTER A..TODHRI LETTER OO + +# Total code points: 52 + # EOF diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 3bb1d2fe4..66e4e0a38 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -2123,6 +2123,7 @@ 088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;; 0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;; 0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;; +0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;; 0898;ARABIC SMALL HIGH WORD AL-JUZ;Mn;230;NSM;;;;;N;;;;; 0899;ARABIC SMALL LOW WORD ISHMAAM;Mn;220;NSM;;;;;N;;;;; 089A;ARABIC SMALL LOW WORD IMAALA;Mn;220;NSM;;;;;N;;;;; @@ -2861,6 +2862,7 @@ 0C58;TELUGU LETTER TSA;Lo;0;L;;;;;N;;;;; 0C59;TELUGU LETTER DZA;Lo;0;L;;;;;N;;;;; 0C5A;TELUGU LETTER RRRA;Lo;0;L;;;;;N;;;;; +0C5C;TELUGU ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0C5D;TELUGU LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0C60;TELUGU LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; 0C61;TELUGU LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;; @@ -2957,6 +2959,7 @@ 0CCD;KANNADA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;; 0CD5;KANNADA LENGTH MARK;Mc;0;L;;;;;N;;;;; 0CD6;KANNADA AI LENGTH MARK;Mc;0;L;;;;;N;;;;; +0CDC;KANNADA ARCHAIC SHRII;Lo;0;L;;;;;N;;;;; 0CDD;KANNADA LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;; 0CDE;KANNADA LETTER FA;Lo;0;L;;;;;N;;;;; 0CE0;KANNADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;; @@ -6511,6 +6514,8 @@ 1C86;CYRILLIC SMALL LETTER TALL HARD SIGN;Ll;0;L;;;;;N;;;042A;;042A 1C87;CYRILLIC SMALL LETTER TALL YAT;Ll;0;L;;;;;N;;;0462;;0462 1C88;CYRILLIC SMALL LETTER UNBLENDED UK;Ll;0;L;;;;;N;;;A64A;;A64A +1C89;CYRILLIC CAPITAL LETTER TJE;Lu;0;L;;;;;N;;;;1C8A; +1C8A;CYRILLIC SMALL LETTER TJE;Ll;0;L;;;;;N;;;1C89;;1C89 1C90;GEORGIAN MTAVRULI CAPITAL LETTER AN;Lu;0;L;;;;;N;;;;10D0; 1C91;GEORGIAN MTAVRULI CAPITAL LETTER BAN;Lu;0;L;;;;;N;;;;10D1; 1C92;GEORGIAN MTAVRULI CAPITAL LETTER GAN;Lu;0;L;;;;;N;;;;10D2; @@ -18002,6 +18007,58 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 105B9;VITHKUQI SMALL LETTER XE;Ll;0;L;;;;;N;;;10592;;10592 105BB;VITHKUQI SMALL LETTER Y;Ll;0;L;;;;;N;;;10594;;10594 105BC;VITHKUQI SMALL LETTER ZE;Ll;0;L;;;;;N;;;10595;;10595 +105C0;TODHRI LETTER A;Lo;0;L;;;;;N;;;;; +105C1;TODHRI LETTER AS;Lo;0;L;;;;;N;;;;; +105C2;TODHRI LETTER BA;Lo;0;L;;;;;N;;;;; +105C3;TODHRI LETTER MBA;Lo;0;L;;;;;N;;;;; +105C4;TODHRI LETTER CA;Lo;0;L;;;;;N;;;;; +105C5;TODHRI LETTER CHA;Lo;0;L;;;;;N;;;;; +105C6;TODHRI LETTER DA;Lo;0;L;;;;;N;;;;; +105C7;TODHRI LETTER NDA;Lo;0;L;;;;;N;;;;; +105C8;TODHRI LETTER DHA;Lo;0;L;;;;;N;;;;; +105C9;TODHRI LETTER EI;Lo;0;L;105D2 0307;;;;N;;;;; +105CA;TODHRI LETTER E;Lo;0;L;;;;;N;;;;; +105CB;TODHRI LETTER FA;Lo;0;L;;;;;N;;;;; +105CC;TODHRI LETTER GA;Lo;0;L;;;;;N;;;;; +105CD;TODHRI LETTER NGA;Lo;0;L;;;;;N;;;;; +105CE;TODHRI LETTER GJA;Lo;0;L;;;;;N;;;;; +105CF;TODHRI LETTER NGJA;Lo;0;L;;;;;N;;;;; +105D0;TODHRI LETTER HA;Lo;0;L;;;;;N;;;;; +105D1;TODHRI LETTER HJA;Lo;0;L;;;;;N;;;;; +105D2;TODHRI LETTER I;Lo;0;L;;;;;N;;;;; +105D3;TODHRI LETTER JA;Lo;0;L;;;;;N;;;;; +105D4;TODHRI LETTER KA;Lo;0;L;;;;;N;;;;; +105D5;TODHRI LETTER LA;Lo;0;L;;;;;N;;;;; +105D6;TODHRI LETTER LLA;Lo;0;L;;;;;N;;;;; +105D7;TODHRI LETTER MA;Lo;0;L;;;;;N;;;;; +105D8;TODHRI LETTER NA;Lo;0;L;;;;;N;;;;; +105D9;TODHRI LETTER NJAN;Lo;0;L;;;;;N;;;;; +105DA;TODHRI LETTER O;Lo;0;L;;;;;N;;;;; +105DB;TODHRI LETTER PA;Lo;0;L;;;;;N;;;;; +105DC;TODHRI LETTER QA;Lo;0;L;;;;;N;;;;; +105DD;TODHRI LETTER RA;Lo;0;L;;;;;N;;;;; +105DE;TODHRI LETTER RRA;Lo;0;L;;;;;N;;;;; +105DF;TODHRI LETTER SA;Lo;0;L;;;;;N;;;;; +105E0;TODHRI LETTER SHA;Lo;0;L;;;;;N;;;;; +105E1;TODHRI LETTER SHTA;Lo;0;L;;;;;N;;;;; +105E2;TODHRI LETTER TA;Lo;0;L;;;;;N;;;;; +105E3;TODHRI LETTER THA;Lo;0;L;;;;;N;;;;; +105E4;TODHRI LETTER U;Lo;0;L;105DA 0307;;;;N;;;;; +105E5;TODHRI LETTER VA;Lo;0;L;;;;;N;;;;; +105E6;TODHRI LETTER XA;Lo;0;L;;;;;N;;;;; +105E7;TODHRI LETTER NXA;Lo;0;L;;;;;N;;;;; +105E8;TODHRI LETTER XHA;Lo;0;L;;;;;N;;;;; +105E9;TODHRI LETTER NXHA;Lo;0;L;;;;;N;;;;; +105EA;TODHRI LETTER Y;Lo;0;L;;;;;N;;;;; +105EB;TODHRI LETTER JY;Lo;0;L;;;;;N;;;;; +105EC;TODHRI LETTER ZA;Lo;0;L;;;;;N;;;;; +105ED;TODHRI LETTER ZHA;Lo;0;L;;;;;N;;;;; +105EE;TODHRI LETTER GHA;Lo;0;L;;;;;N;;;;; +105EF;TODHRI LETTER STA;Lo;0;L;;;;;N;;;;; +105F0;TODHRI LETTER SKAN;Lo;0;L;;;;;N;;;;; +105F1;TODHRI LETTER KHA;Lo;0;L;;;;;N;;;;; +105F2;TODHRI LETTER PSA;Lo;0;L;;;;;N;;;;; +105F3;TODHRI LETTER OO;Lo;0;L;;;;;N;;;;; 10600;LINEAR A SIGN AB001;Lo;0;L;;;;;N;;;;; 10601;LINEAR A SIGN AB002;Lo;0;L;;;;;N;;;;; 10602;LINEAR A SIGN AB003;Lo;0;L;;;;;N;;;;; @@ -19323,6 +19380,75 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10D37;HANIFI ROHINGYA DIGIT SEVEN;Nd;0;AN;;7;7;7;N;;;;; 10D38;HANIFI ROHINGYA DIGIT EIGHT;Nd;0;AN;;8;8;8;N;;;;; 10D39;HANIFI ROHINGYA DIGIT NINE;Nd;0;AN;;9;9;9;N;;;;; +10D40;GARAY DIGIT ZERO;Nd;0;AN;;0;0;0;N;;;;; +10D41;GARAY DIGIT ONE;Nd;0;AN;;1;1;1;N;;;;; +10D42;GARAY DIGIT TWO;Nd;0;AN;;2;2;2;N;;;;; +10D43;GARAY DIGIT THREE;Nd;0;AN;;3;3;3;N;;;;; +10D44;GARAY DIGIT FOUR;Nd;0;AN;;4;4;4;N;;;;; +10D45;GARAY DIGIT FIVE;Nd;0;AN;;5;5;5;N;;;;; +10D46;GARAY DIGIT SIX;Nd;0;AN;;6;6;6;N;;;;; +10D47;GARAY DIGIT SEVEN;Nd;0;AN;;7;7;7;N;;;;; +10D48;GARAY DIGIT EIGHT;Nd;0;AN;;8;8;8;N;;;;; +10D49;GARAY DIGIT NINE;Nd;0;AN;;9;9;9;N;;;;; +10D4A;GARAY VOWEL SIGN A;Lo;0;R;;;;;N;;;;; +10D4B;GARAY VOWEL SIGN I;Lo;0;R;;;;;N;;;;; +10D4C;GARAY VOWEL SIGN O;Lo;0;R;;;;;N;;;;; +10D4D;GARAY VOWEL SIGN EE;Lo;0;R;;;;;N;;;;; +10D4E;GARAY VOWEL LENGTH MARK;Lm;0;R;;;;;N;;;;; +10D4F;GARAY SUKUN;Lo;0;R;;;;;N;;;;; +10D50;GARAY CAPITAL LETTER A;Lu;0;R;;;;;N;;;;10D70; +10D51;GARAY CAPITAL LETTER CA;Lu;0;R;;;;;N;;;;10D71; +10D52;GARAY CAPITAL LETTER MA;Lu;0;R;;;;;N;;;;10D72; +10D53;GARAY CAPITAL LETTER KA;Lu;0;R;;;;;N;;;;10D73; +10D54;GARAY CAPITAL LETTER BA;Lu;0;R;;;;;N;;;;10D74; +10D55;GARAY CAPITAL LETTER JA;Lu;0;R;;;;;N;;;;10D75; +10D56;GARAY CAPITAL LETTER SA;Lu;0;R;;;;;N;;;;10D76; +10D57;GARAY CAPITAL LETTER WA;Lu;0;R;;;;;N;;;;10D77; +10D58;GARAY CAPITAL LETTER LA;Lu;0;R;;;;;N;;;;10D78; +10D59;GARAY CAPITAL LETTER GA;Lu;0;R;;;;;N;;;;10D79; +10D5A;GARAY CAPITAL LETTER DA;Lu;0;R;;;;;N;;;;10D7A; +10D5B;GARAY CAPITAL LETTER XA;Lu;0;R;;;;;N;;;;10D7B; +10D5C;GARAY CAPITAL LETTER YA;Lu;0;R;;;;;N;;;;10D7C; +10D5D;GARAY CAPITAL LETTER TA;Lu;0;R;;;;;N;;;;10D7D; +10D5E;GARAY CAPITAL LETTER RA;Lu;0;R;;;;;N;;;;10D7E; +10D5F;GARAY CAPITAL LETTER NYA;Lu;0;R;;;;;N;;;;10D7F; +10D60;GARAY CAPITAL LETTER FA;Lu;0;R;;;;;N;;;;10D80; +10D61;GARAY CAPITAL LETTER NA;Lu;0;R;;;;;N;;;;10D81; +10D62;GARAY CAPITAL LETTER PA;Lu;0;R;;;;;N;;;;10D82; +10D63;GARAY CAPITAL LETTER HA;Lu;0;R;;;;;N;;;;10D83; +10D64;GARAY CAPITAL LETTER OLD KA;Lu;0;R;;;;;N;;;;10D84; +10D65;GARAY CAPITAL LETTER OLD NA;Lu;0;R;;;;;N;;;;10D85; +10D69;GARAY VOWEL SIGN E;Mn;230;NSM;;;;;N;;;;; +10D6A;GARAY CONSONANT GEMINATION MARK;Mn;230;NSM;;;;;N;;;;; +10D6B;GARAY COMBINING DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +10D6C;GARAY COMBINING DOUBLE DOT ABOVE;Mn;230;NSM;;;;;N;;;;; +10D6D;GARAY CONSONANT NASALIZATION MARK;Mn;230;NSM;;;;;N;;;;; +10D6E;GARAY HYPHEN;Pd;0;ON;;;;;N;;;;; +10D6F;GARAY REDUPLICATION MARK;Lm;0;R;;;;;N;;;;; +10D70;GARAY SMALL LETTER A;Ll;0;R;;;;;N;;;10D50;;10D50 +10D71;GARAY SMALL LETTER CA;Ll;0;R;;;;;N;;;10D51;;10D51 +10D72;GARAY SMALL LETTER MA;Ll;0;R;;;;;N;;;10D52;;10D52 +10D73;GARAY SMALL LETTER KA;Ll;0;R;;;;;N;;;10D53;;10D53 +10D74;GARAY SMALL LETTER BA;Ll;0;R;;;;;N;;;10D54;;10D54 +10D75;GARAY SMALL LETTER JA;Ll;0;R;;;;;N;;;10D55;;10D55 +10D76;GARAY SMALL LETTER SA;Ll;0;R;;;;;N;;;10D56;;10D56 +10D77;GARAY SMALL LETTER WA;Ll;0;R;;;;;N;;;10D57;;10D57 +10D78;GARAY SMALL LETTER LA;Ll;0;R;;;;;N;;;10D58;;10D58 +10D79;GARAY SMALL LETTER GA;Ll;0;R;;;;;N;;;10D59;;10D59 +10D7A;GARAY SMALL LETTER DA;Ll;0;R;;;;;N;;;10D5A;;10D5A +10D7B;GARAY SMALL LETTER XA;Ll;0;R;;;;;N;;;10D5B;;10D5B +10D7C;GARAY SMALL LETTER YA;Ll;0;R;;;;;N;;;10D5C;;10D5C +10D7D;GARAY SMALL LETTER TA;Ll;0;R;;;;;N;;;10D5D;;10D5D +10D7E;GARAY SMALL LETTER RA;Ll;0;R;;;;;N;;;10D5E;;10D5E +10D7F;GARAY SMALL LETTER NYA;Ll;0;R;;;;;N;;;10D5F;;10D5F +10D80;GARAY SMALL LETTER FA;Ll;0;R;;;;;N;;;10D60;;10D60 +10D81;GARAY SMALL LETTER NA;Ll;0;R;;;;;N;;;10D61;;10D61 +10D82;GARAY SMALL LETTER PA;Ll;0;R;;;;;N;;;10D62;;10D62 +10D83;GARAY SMALL LETTER HA;Ll;0;R;;;;;N;;;10D63;;10D63 +10D84;GARAY SMALL LETTER OLD KA;Ll;0;R;;;;;N;;;10D64;;10D64 +10D85;GARAY SMALL LETTER OLD NA;Ll;0;R;;;;;N;;;10D65;;10D65 +10D8E;GARAY PLUS SIGN;Sm;0;R;;;;;N;;;;; +10D8F;GARAY MINUS SIGN;Sm;0;R;;;;;N;;;;; 10E60;RUMI DIGIT ONE;No;0;AN;;;1;1;N;;;;; 10E61;RUMI DIGIT TWO;No;0;AN;;;2;2;N;;;;; 10E62;RUMI DIGIT THREE;No;0;AN;;;3;3;N;;;;; @@ -19401,6 +19527,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 10EAD;YEZIDI HYPHENATION MARK;Pd;0;R;;;;;N;;;;; 10EB0;YEZIDI LETTER LAM WITH DOT ABOVE;Lo;0;R;;;;;N;;;;; 10EB1;YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE;Lo;0;R;;;;;N;;;;; +10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;; +10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;; 10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;; 10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;; 10EFF;ARABIC SMALL LOW WORD MADDA;Mn;220;NSM;;;;;N;;;;; @@ -20696,6 +20826,26 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 116C7;TAKRI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; 116C8;TAKRI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; 116C9;TAKRI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +116D0;MYANMAR PAO DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +116D1;MYANMAR PAO DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +116D2;MYANMAR PAO DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +116D3;MYANMAR PAO DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +116D4;MYANMAR PAO DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +116D5;MYANMAR PAO DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +116D6;MYANMAR PAO DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +116D7;MYANMAR PAO DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +116D8;MYANMAR PAO DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +116D9;MYANMAR PAO DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; +116DA;MYANMAR EASTERN PWO KAREN DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +116DB;MYANMAR EASTERN PWO KAREN DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +116DC;MYANMAR EASTERN PWO KAREN DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +116DD;MYANMAR EASTERN PWO KAREN DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +116DE;MYANMAR EASTERN PWO KAREN DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +116DF;MYANMAR EASTERN PWO KAREN DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +116E0;MYANMAR EASTERN PWO KAREN DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +116E1;MYANMAR EASTERN PWO KAREN DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +116E2;MYANMAR EASTERN PWO KAREN DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +116E3;MYANMAR EASTERN PWO KAREN DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 11700;AHOM LETTER KA;Lo;0;L;;;;;N;;;;; 11701;AHOM LETTER KHA;Lo;0;L;;;;;N;;;;; 11702;AHOM LETTER NGA;Lo;0;L;;;;;N;;;;; @@ -21280,6 +21430,50 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 11B07;DEVANAGARI SIGN WESTERN NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B08;DEVANAGARI SIGN REVERSED NINE-LIKE BHALE;Po;0;L;;;;;N;;;;; 11B09;DEVANAGARI SIGN MINDU;Po;0;L;;;;;N;;;;; +11BC0;SUNUWAR LETTER DEVI;Lo;0;L;;;;;N;;;;; +11BC1;SUNUWAR LETTER TASLA;Lo;0;L;;;;;N;;;;; +11BC2;SUNUWAR LETTER EKO;Lo;0;L;;;;;N;;;;; +11BC3;SUNUWAR LETTER IMAR;Lo;0;L;;;;;N;;;;; +11BC4;SUNUWAR LETTER REU;Lo;0;L;;;;;N;;;;; +11BC5;SUNUWAR LETTER UTTHI;Lo;0;L;;;;;N;;;;; +11BC6;SUNUWAR LETTER KIK;Lo;0;L;;;;;N;;;;; +11BC7;SUNUWAR LETTER MA;Lo;0;L;;;;;N;;;;; +11BC8;SUNUWAR LETTER APPHO;Lo;0;L;;;;;N;;;;; +11BC9;SUNUWAR LETTER PIP;Lo;0;L;;;;;N;;;;; +11BCA;SUNUWAR LETTER GIL;Lo;0;L;;;;;N;;;;; +11BCB;SUNUWAR LETTER HAMSO;Lo;0;L;;;;;N;;;;; +11BCC;SUNUWAR LETTER CARMI;Lo;0;L;;;;;N;;;;; +11BCD;SUNUWAR LETTER NAH;Lo;0;L;;;;;N;;;;; +11BCE;SUNUWAR LETTER BUR;Lo;0;L;;;;;N;;;;; +11BCF;SUNUWAR LETTER JYAH;Lo;0;L;;;;;N;;;;; +11BD0;SUNUWAR LETTER LOACHA;Lo;0;L;;;;;N;;;;; +11BD1;SUNUWAR LETTER OTTHI;Lo;0;L;;;;;N;;;;; +11BD2;SUNUWAR LETTER SHYELE;Lo;0;L;;;;;N;;;;; +11BD3;SUNUWAR LETTER VARCA;Lo;0;L;;;;;N;;;;; +11BD4;SUNUWAR LETTER YAT;Lo;0;L;;;;;N;;;;; +11BD5;SUNUWAR LETTER AVA;Lo;0;L;;;;;N;;;;; +11BD6;SUNUWAR LETTER AAL;Lo;0;L;;;;;N;;;;; +11BD7;SUNUWAR LETTER DONGA;Lo;0;L;;;;;N;;;;; +11BD8;SUNUWAR LETTER THARI;Lo;0;L;;;;;N;;;;; +11BD9;SUNUWAR LETTER PHAR;Lo;0;L;;;;;N;;;;; +11BDA;SUNUWAR LETTER NGAR;Lo;0;L;;;;;N;;;;; +11BDB;SUNUWAR LETTER KHA;Lo;0;L;;;;;N;;;;; +11BDC;SUNUWAR LETTER SHYER;Lo;0;L;;;;;N;;;;; +11BDD;SUNUWAR LETTER CHELAP;Lo;0;L;;;;;N;;;;; +11BDE;SUNUWAR LETTER TENTU;Lo;0;L;;;;;N;;;;; +11BDF;SUNUWAR LETTER THELE;Lo;0;L;;;;;N;;;;; +11BE0;SUNUWAR LETTER KLOKO;Lo;0;L;;;;;N;;;;; +11BE1;SUNUWAR SIGN PVO;Po;0;L;;;;;N;;;;; +11BF0;SUNUWAR DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;; +11BF1;SUNUWAR DIGIT ONE;Nd;0;L;;1;1;1;N;;;;; +11BF2;SUNUWAR DIGIT TWO;Nd;0;L;;2;2;2;N;;;;; +11BF3;SUNUWAR DIGIT THREE;Nd;0;L;;3;3;3;N;;;;; +11BF4;SUNUWAR DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;; +11BF5;SUNUWAR DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;; +11BF6;SUNUWAR DIGIT SIX;Nd;0;L;;6;6;6;N;;;;; +11BF7;SUNUWAR DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;; +11BF8;SUNUWAR DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;; +11BF9;SUNUWAR DIGIT NINE;Nd;0;L;;9;9;9;N;;;;; 11C00;BHAIKSUKI LETTER A;Lo;0;L;;;;;N;;;;; 11C01;BHAIKSUKI LETTER AA;Lo;0;L;;;;;N;;;;; 11C02;BHAIKSUKI LETTER I;Lo;0;L;;;;;N;;;;; @@ -27981,6 +28175,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1BCA1;SHORTHAND FORMAT CONTINUING OVERLAP;Cf;0;BN;;;;;N;;;;; 1BCA2;SHORTHAND FORMAT DOWN STEP;Cf;0;BN;;;;;N;;;;; 1BCA3;SHORTHAND FORMAT UP STEP;Cf;0;BN;;;;;N;;;;; +1CEB0;HORIZONTAL ZIGZAG LINE;So;0;ON;;;;;N;;;;; +1CEB1;KEYHOLE;So;0;ON;;;;;N;;;;; +1CEB2;OLD PERSONAL COMPUTER WITH MONITOR IN PORTRAIT ORIENTATION;So;0;ON;;;;;N;;;;; +1CEB3;BLACK RIGHT TRIANGLE CARET;So;0;ON;;;;;N;;;;; 1CF00;ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF01;ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT;Mn;0;NSM;;;;;N;;;;; 1CF02;ZNAMENNY COMBINING MARK TSATA ON LEFT;Mn;0;NSM;;;;;N;;;;; @@ -33358,6 +33556,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1F8AD;WHITE ARROW SHAFT WIDTH TWO THIRDS;So;0;ON;;;;;N;;;;; 1F8B0;ARROW POINTING UPWARDS THEN NORTH WEST;So;0;ON;;;;;N;;;;; 1F8B1;ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST;So;0;ON;;;;;N;;;;; +1F8B2;RIGHTWARDS ARROW WITH LOWER HOOK;So;0;ON;;;;;N;;;;; 1F900;CIRCLED CROSS FORMEE WITH FOUR DOTS;So;0;ON;;;;;N;;;;; 1F901;CIRCLED CROSS FORMEE WITH TWO DOTS;So;0;ON;;;;;N;;;;; 1F902;CIRCLED CROSS FORMEE;So;0;ON;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 9c73a6a1f..af370cf10 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2023-10-03, 12:05:52 GMT +# Date: 2023-10-24, 09:20:02 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -268,7 +268,7 @@ 0888 ; R # Sk ARABIC RAISED ROUND DOT 0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL 0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; R # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; R # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; R # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -436,7 +436,7 @@ 0C4A..0C4D ; R # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; R # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; R # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; R # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; R # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; R # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; R # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; R # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -462,7 +462,7 @@ 0CCA..0CCB ; R # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; R # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; R # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; R # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; R # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; R # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; R # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; R # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -793,7 +793,7 @@ 1C5A..1C77 ; R # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; R # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; R # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; R # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; R # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; R # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; R # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; R # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -1694,6 +1694,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 105A3..105B1 ; R # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; R # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; R # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; R # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; R # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; R # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; R # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1766,12 +1767,23 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 10D00..10D23 ; R # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; R # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; R # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; R # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; R # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; R # Lm GARAY VOWEL LENGTH MARK +10D4F ; R # Lo GARAY SUKUN +10D50..10D65 ; R # Lu [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; R # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6E ; R # Pd GARAY HYPHEN +10D6F ; R # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; R # Ll [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; R # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; R # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; R # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; R # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1949,6 +1961,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 116B8 ; R # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; R # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; R # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; R # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; R # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; R # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; R # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -2023,6 +2036,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 11AB0..11ABF ; U # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA 11AC0..11AF8 ; R # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; R # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; R # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; R # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; R # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; R # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; R # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; R # Mc BHAIKSUKI VOWEL SIGN AA @@ -2172,6 +2188,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1BC9D..1BC9E ; R # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1BC9F ; R # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1BCA0..1BCA3 ; R # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; R # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF00..1CF2D ; U # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF2E..1CF2F ; U # Cn [2] .. 1CF30..1CF46 ; U # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG @@ -2393,7 +2410,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1F850..1F859 ; R # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; R # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; R # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; R # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; R # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1F9FF ; U # So [256] CIRCLED CROSS FORMEE WITH FOUR DOTS..NAZAR AMULET 1FA00..1FA53 ; U # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FA54..1FA5F ; U # Cn [12] .. diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index 12453cbdb..91bb0bd24 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ -# GraphemeBreakProperty-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT +# GraphemeBreakProperty-16.0.0.txt +# Date: 2023-10-23, 18:45:01 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -106,7 +106,7 @@ E01F0..E0FFF ; Control # Cn [3600] .. 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE @@ -324,8 +324,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10A3F ; Extend # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Extend # Mn BRAHMI SIGN ANUSVARA @@ -459,7 +460,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2130 +# Total code points: 2137 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index a9ae2e260..e5d964f9f 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2023-10-03, 12:05:49 GMT +# Date: 2023-10-24, 09:20:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -55,7 +55,7 @@ 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA @@ -371,8 +371,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10A3F ; Extend # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -567,7 +568,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2550 +# Total code points: 2557 # ================================================ @@ -892,6 +893,7 @@ E0001 ; Format # Cf LANGUAGE TAG 10FC ; Lower # Lm MODIFIER LETTER GEORGIAN NAR 13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Lower # L& CYRILLIC SMALL LETTER TJE 1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI 1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G @@ -1261,6 +1263,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10787..107B0 ; Lower # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Lower # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL 10CC0..10CF2 ; Lower # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Lower # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Lower # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Lower # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D41A..1D433 ; Lower # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -1297,7 +1300,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2497 +# Total code points: 2520 # ================================================ @@ -1577,6 +1580,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 10C7 ; Upper # L& GEORGIAN CAPITAL LETTER YN 10CD ; Upper # L& GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Upper # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Upper # L& CYRILLIC CAPITAL LETTER TJE 1E00 ; Upper # L& LATIN CAPITAL LETTER A WITH RING BELOW 1E02 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1917,6 +1921,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1058C..10592 ; Upper # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Upper # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Upper # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Upper # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Upper # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Upper # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1D400..1D419 ; Upper # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1955,7 +1960,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1937 +# Total code points: 1960 # ================================================ @@ -2063,7 +2068,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; OLetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2072,7 +2077,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT 0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; OLetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2322,6 +2327,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10450..1049D ; OLetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 10500..10527 ; OLetter # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; OLetter # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +105C0..105F3 ; OLetter # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; OLetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; OLetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; OLetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -2354,8 +2360,13 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10B80..10B91 ; OLetter # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW 10C00..10C48 ; OLetter # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10D00..10D23 ; OLetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; OLetter # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; OLetter # Lm GARAY VOWEL LENGTH MARK +10D4F ; OLetter # Lo GARAY SUKUN +10D6F ; OLetter # Lm GARAY REDUPLICATION MARK 10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2427,6 +2438,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11A5C..11A89 ; OLetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; OLetter # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; OLetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; OLetter # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA @@ -2537,7 +2549,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132658 +# Total code points: 132755 # ================================================ @@ -2585,6 +2597,7 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Numeric # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Numeric # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110BD ; Numeric # Cf KAITHI NUMBER SIGN 110CD ; Numeric # Cf KAITHI NUMBER SIGN ABOVE @@ -2596,9 +2609,11 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Numeric # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Numeric # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE @@ -2613,7 +2628,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 694 +# Total code points: 734 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 44e1ed75c..39d0db094 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2023-10-03, 12:05:53 GMT +# Date: 2023-10-24, 09:20:02 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -91,7 +91,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW 0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; Extend # Mc DEVANAGARI SIGN VISARGA @@ -407,8 +407,9 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 10A3F ; Extend # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Extend # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU @@ -604,7 +605,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2554 +# Total code points: 2561 # ================================================ @@ -794,7 +795,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; ALetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -803,7 +804,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; ALetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -877,7 +878,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK 1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; ALetter # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; ALetter # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; ALetter # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; ALetter # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL @@ -1077,6 +1078,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 105A3..105B1 ; ALetter # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; ALetter # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; ALetter # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; ALetter # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; ALetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; ALetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; ALetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1113,8 +1115,15 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 10C80..10CB2 ; ALetter # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; ALetter # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10D00..10D23 ; ALetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; ALetter # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; ALetter # Lm GARAY VOWEL LENGTH MARK +10D4F ; ALetter # Lo GARAY SUKUN +10D50..10D65 ; ALetter # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; ALetter # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; ALetter # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -1185,6 +1194,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 11A5C..11A89 ; ALetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; ALetter # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; ALetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; ALetter # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; ALetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; ALetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; ALetter # Lo BHAIKSUKI SIGN AVAGRAHA @@ -1313,7 +1323,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 29491 +# Total code points: 29634 # ================================================ @@ -1406,6 +1416,7 @@ ABF0..ABF9 ; Numeric # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Numeric # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Numeric # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Numeric # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Numeric # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110BD ; Numeric # Cf KAITHI NUMBER SIGN 110CD ; Numeric # Cf KAITHI NUMBER SIGN ABOVE @@ -1417,9 +1428,11 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Numeric # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Numeric # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE @@ -1434,7 +1447,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 693 +# Total code points: 733 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 5b720a294..939cd3b45 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2023-10-03, 12:05:23 GMT +# Date: 2023-10-24, 09:19:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -84,6 +84,7 @@ # 10D00..10D3F Hanifi_Rohingya # @missing: 10D00..10D3F; Arabic_Letter +# 10D40..10D8F Garay # 10E60..10E7F Rumi_Numeral_Symbols # 10E80..10EBF Yezidi # @missing: 10D40..10EBF; Right_To_Left @@ -272,7 +273,7 @@ 0C3D ; L # Lo TELUGU SIGN AVAGRAHA 0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR 0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE 0C77 ; L # Po TELUGU SIGN SIDDHAM @@ -293,7 +294,7 @@ 0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI 0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE 0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA @@ -517,7 +518,7 @@ 1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; L # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; L # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -821,6 +822,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; L # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -939,6 +941,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; L # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA 11726 ; L # Mc AHOM VOWEL SIGN E @@ -991,6 +994,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; L # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA @@ -1182,8 +1188,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 820461 code points not listed here. -# Total code points: 1096267 +# The above property value applies to 820336 code points not listed here. +# Total code points: 1096262 # ================================================ @@ -1270,6 +1276,13 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US 10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D4A..10D4D ; R # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; R # Lm GARAY VOWEL LENGTH MARK +10D4F ; R # Lo GARAY SUKUN +10D50..10D65 ; R # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; R # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; R # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; R # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAD ; R # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE @@ -1288,8 +1301,8 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL 1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# The above property value applies to 2156 code points not listed here. -# Total code points: 3647 +# The above property value applies to 2087 code points not listed here. +# Total code points: 3631 # ================================================ @@ -1378,9 +1391,10 @@ FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN 0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE 08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH 10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; AN # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -# Total code points: 63 +# Total code points: 73 # ================================================ @@ -1894,11 +1908,13 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 101A0 ; ON # So GREEK SYMBOL TAU RHO 1091F ; ON # Po PHOENICIAN WORD SEPARATOR 10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10D6E ; ON # Pd GARAY HYPHEN 11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND 11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT 11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI 11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA 16FE2 ; ON # Po OLD CHINESE HOOK MARK +1CEB0..1CEB3 ; ON # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON 1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; ON # So GREEK MUSICAL LEIMMA @@ -1935,7 +1951,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; ON # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH @@ -1948,7 +1964,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE 1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6034 +# Total code points: 6040 # ================================================ @@ -2023,7 +2039,7 @@ FFFFE..FFFFF ; BN # Cn [2] .. 0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; NSM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; NSM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE @@ -2223,8 +2239,9 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 10A3F ; NSM # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; NSM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; NSM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; NSM # Mn BRAHMI SIGN ANUSVARA @@ -2348,7 +2365,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC 1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1993 +# Total code points: 2000 # ================================================ @@ -2395,6 +2412,7 @@ FDFC ; AL # Sc RIAL SIGN FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN 10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED 10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT @@ -2440,8 +2458,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI 1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH 1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN -# The above property value applies to 298 code points not listed here. -# Total code points: 1769 +# The above property value applies to 293 code points not listed here. +# Total code points: 1767 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 6f06eeac4..3615f4d72 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2023-10-03, 12:05:24 GMT +# Date: 2023-10-24, 09:19:36 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -335,7 +335,7 @@ 0C46..0C48 ; 0 # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI 0C4A..0C4C ; 0 # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU 0C58..0C5A ; 0 # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; 0 # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; 0 # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; 0 # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; 0 # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; 0 # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -360,7 +360,7 @@ 0CCA..0CCB ; 0 # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC ; 0 # Mn KANNADA VOWEL SIGN AU 0CD5..0CD6 ; 0 # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; 0 # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; 0 # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; 0 # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; 0 # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; 0 # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -664,7 +664,7 @@ 1C5A..1C77 ; 0 # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; 0 # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; 0 # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; 0 # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; 0 # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; 0 # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; 0 # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; 0 # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -1392,6 +1392,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 105A3..105B1 ; 0 # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; 0 # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; 0 # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; 0 # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; 0 # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; 0 # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; 0 # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1459,10 +1460,21 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 10CFA..10CFF ; 0 # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND 10D00..10D23 ; 0 # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D30..10D39 ; 0 # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; 0 # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; 0 # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; 0 # Lm GARAY VOWEL LENGTH MARK +10D4F ; 0 # Lo GARAY SUKUN +10D50..10D65 ; 0 # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6E ; 0 # Pd GARAY HYPHEN +10D6F ; 0 # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; 0 # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; 0 # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; 0 # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; 0 # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAD ; 0 # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY 10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; 0 # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1622,6 +1634,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 116B8 ; 0 # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; 0 # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; 0 # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; 0 # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; 0 # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; 0 # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; 0 # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -1687,6 +1700,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 11A9E..11AA2 ; 0 # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; 0 # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; 0 # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; 0 # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; 0 # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; 0 # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; 0 # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; 0 # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; 0 # Mc BHAIKSUKI VOWEL SIGN AA @@ -1811,6 +1827,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1BC9D ; 0 # Mn DUPLOYAN THICK LETTER SELECTOR 1BC9F ; 0 # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1BCA0..1BCA3 ; 0 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; 0 # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -1977,7 +1994,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F850..1F859 ; 0 # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; 0 # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; 0 # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; 0 # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; 0 # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH @@ -2005,8 +2022,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 826765 code points not listed here. -# Total code points: 1113190 +# The above property value applies to 826566 code points not listed here. +# Total code points: 1113184 # ================================================ @@ -2652,7 +2669,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON 081B..0823 ; 230 # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A 0825..0827 ; 230 # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; 230 # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA -0898 ; 230 # Mn ARABIC SMALL HIGH WORD AL-JUZ +0897..0898 ; 230 # Mn [2] ARABIC PEPET..ARABIC SMALL HIGH WORD AL-JUZ 089C..089F ; 230 # Mn [4] ARABIC MADDA WAAJIB..ARABIC HALF MADDA OVER MADDA 08CA..08CE ; 230 # Mn [5] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT ABOVE 08D4..08E1 ; 230 # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA @@ -2716,6 +2733,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 10A38 ; 230 # Mn KHAROSHTHI SIGN BAR ABOVE 10AE5 ; 230 # Mn MANICHAEAN ABBREVIATION MARK ABOVE 10D24..10D27 ; 230 # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; 230 # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; 230 # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10F48..10F4A ; 230 # Mn [3] SOGDIAN COMBINING DOT ABOVE..SOGDIAN COMBINING CURVE ABOVE 10F4C ; 230 # Mn SOGDIAN COMBINING HOOK ABOVE @@ -2741,7 +2759,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR 1E4EF ; 230 # Mn NAG MUNDARI SIGN SUTUH 1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER -# Total code points: 510 +# Total code points: 516 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt index 6bd9e8ee6..b1789d11f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt @@ -1,5 +1,5 @@ -# DerivedDecompositionType-15.1.0.txt -# Date: 2023-01-05, 20:34:36 GMT +# DerivedDecompositionType-16.0.0.txt +# Date: 2023-10-16, 14:22:23 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -249,6 +249,8 @@ FB3E ; Canonical # Lo HEBREW LETTER MEM WITH DAGESH FB40..FB41 ; Canonical # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH FB43..FB44 ; Canonical # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LETTER PE WITH RAFE +105C9 ; Canonical # Lo TODHRI LETTER EI +105E4 ; Canonical # Lo TODHRI LETTER U 1109A ; Canonical # Lo KAITHI LETTER DDDHA 1109C ; Canonical # Lo KAITHI LETTER RHA 110AB ; Canonical # Lo KAITHI LETTER VA @@ -262,7 +264,7 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET 1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK 2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 13233 +# Total code points: 13235 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index 70c4ab953..2193ed51b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2023-10-03, 12:05:26 GMT +# Date: 2023-10-24, 09:19:38 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -225,7 +225,7 @@ 0888 ; N # Sk ARABIC RAISED ROUND DOT 0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL 0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE -0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF 08C9 ; N # Lm ARABIC SMALL FARSI YEH 08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA @@ -391,7 +391,7 @@ 0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA 0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK 0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL 0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE @@ -417,7 +417,7 @@ 0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO 0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA 0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL 0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE @@ -743,7 +743,7 @@ 1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD 1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD -1C80..1C88 ; N # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; N # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; N # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; N # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -1397,6 +1397,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 105A3..105B1 ; N # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; N # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; N # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; N # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1467,12 +1468,23 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 10D00..10D23 ; N # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA 10D24..10D27 ; N # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI 10D30..10D39 ; N # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; N # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE +10D4A..10D4D ; N # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; N # Lm GARAY VOWEL LENGTH MARK +10D4F ; N # Lo GARAY SUKUN +10D50..10D65 ; N # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D69..10D6D ; N # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK +10D6E ; N # Pd GARAY HYPHEN +10D6F ; N # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; N # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; N # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; N # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; N # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK 10EAD ; N # Pd YEZIDI HYPHENATION MARK 10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE -10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1647,6 +1659,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA 116B9 ; N # Po TAKRI ABBREVIATION SIGN 116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA 1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA 11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA @@ -1717,6 +1730,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11AB0..11AF8 ; N # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL 11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; N # Po SUNUWAR SIGN PVO +11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA @@ -1830,6 +1846,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1BC9D..1BC9E ; N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK 1BC9F ; N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CEB0..1CEB3 ; N # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT 1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG 1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK @@ -2031,7 +2048,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; N # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F93B ; N # So MODERN PENTATHLON 1F946 ; N # So RIFLE @@ -2043,7 +2060,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 766283 code points not listed here. +# The above property value applies to 766084 code points not listed here. # Total code points: 792618 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 0c5cfaf3e..56520cab8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2023-10-03, 12:05:26 GMT +# Date: 2023-10-24, 09:19:38 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -37,7 +37,7 @@ 085F ; Cn # 086B..086F ; Cn # [5] .. 088F ; Cn # -0892..0897 ; Cn # [6] .. +0892..0896 ; Cn # [5] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. 0991..0992 ; Cn # [2] .. @@ -120,7 +120,7 @@ 0C49 ; Cn # 0C4E..0C54 ; Cn # [7] .. 0C57 ; Cn # -0C5B..0C5C ; Cn # [2] .. +0C5B ; Cn # 0C5E..0C5F ; Cn # [2] .. 0C64..0C65 ; Cn # [2] .. 0C70..0C76 ; Cn # [7] .. @@ -132,7 +132,7 @@ 0CC5 ; Cn # 0CC9 ; Cn # 0CCE..0CD4 ; Cn # [7] .. -0CD7..0CDC ; Cn # [6] .. +0CD7..0CDB ; Cn # [5] .. 0CDF ; Cn # 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # @@ -234,7 +234,7 @@ 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. 1C4A..1C4C ; Cn # [3] .. -1C89..1C8F ; Cn # [7] .. +1C8B..1C8F ; Cn # [5] .. 1CBB..1CBC ; Cn # [2] .. 1CC8..1CCF ; Cn # [8] .. 1CFB..1CFF ; Cn # [5] .. @@ -388,7 +388,8 @@ FFFE..FFFF ; Cn # [2] .. 105A2 ; Cn # 105B2 ; Cn # 105BA ; Cn # -105BD..105FF ; Cn # [67] .. +105BD..105BF ; Cn # [3] .. +105F4..105FF ; Cn # [12] .. 10737..1073F ; Cn # [9] .. 10756..1075F ; Cn # [10] .. 10768..1077F ; Cn # [24] .. @@ -431,11 +432,15 @@ FFFE..FFFF ; Cn # [2] .. 10CB3..10CBF ; Cn # [13] .. 10CF3..10CF9 ; Cn # [7] .. 10D28..10D2F ; Cn # [8] .. -10D3A..10E5F ; Cn # [294] .. +10D3A..10D3F ; Cn # [6] .. +10D66..10D68 ; Cn # [3] .. +10D86..10D8D ; Cn # [8] .. +10D90..10E5F ; Cn # [208] .. 10E7F ; Cn # 10EAA ; Cn # 10EAE..10EAF ; Cn # [2] .. -10EB2..10EFC ; Cn # [75] .. +10EB2..10EC1 ; Cn # [16] .. +10EC5..10EFB ; Cn # [55] .. 10F28..10F2F ; Cn # [8] .. 10F5A..10F6F ; Cn # [22] .. 10F8A..10FAF ; Cn # [38] .. @@ -486,7 +491,8 @@ FFFE..FFFF ; Cn # [2] .. 1165A..1165F ; Cn # [6] .. 1166D..1167F ; Cn # [19] .. 116BA..116BF ; Cn # [6] .. -116CA..116FF ; Cn # [54] .. +116CA..116CF ; Cn # [6] .. +116E4..116FF ; Cn # [28] .. 1171B..1171C ; Cn # [2] .. 1172C..1172F ; Cn # [4] .. 11747..117FF ; Cn # [185] .. @@ -506,7 +512,9 @@ FFFE..FFFF ; Cn # [2] .. 11A48..11A4F ; Cn # [8] .. 11AA3..11AAF ; Cn # [13] .. 11AF9..11AFF ; Cn # [7] .. -11B0A..11BFF ; Cn # [246] .. +11B0A..11BBF ; Cn # [182] .. +11BE2..11BEF ; Cn # [14] .. +11BFA..11BFF ; Cn # [6] .. 11C09 ; Cn # 11C37 ; Cn # 11C46..11C4F ; Cn # [10] .. @@ -574,7 +582,8 @@ FFFE..FFFF ; Cn # [2] .. 1BC7D..1BC7F ; Cn # [3] .. 1BC89..1BC8F ; Cn # [7] .. 1BC9A..1BC9B ; Cn # [2] .. -1BCA4..1CEFF ; Cn # [4700] .. +1BCA4..1CEAF ; Cn # [4620] .. +1CEB4..1CEFF ; Cn # [76] .. 1CF2E..1CF2F ; Cn # [2] .. 1CF47..1CF4F ; Cn # [9] .. 1CFC4..1CFFF ; Cn # [60] .. @@ -695,7 +704,7 @@ FFFE..FFFF ; Cn # [2] .. 1F85A..1F85F ; Cn # [6] .. 1F888..1F88F ; Cn # [8] .. 1F8AE..1F8AF ; Cn # [2] .. -1F8B2..1F8FF ; Cn # [78] .. +1F8B3..1F8FF ; Cn # [77] .. 1FA54..1FA5F ; Cn # [12] .. 1FA6E..1FA6F ; Cn # [2] .. 1FA7D..1FA7F ; Cn # [3] .. @@ -723,7 +732,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 824717 +# Total code points: 824518 # ================================================ @@ -1005,6 +1014,7 @@ FFFFE..FFFFF ; Cn # [2] .. 10C7 ; Lu # GEORGIAN CAPITAL LETTER YN 10CD ; Lu # GEORGIAN CAPITAL LETTER AEN 13A0..13F5 ; Lu # [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C89 ; Lu # CYRILLIC CAPITAL LETTER TJE 1C90..1CBA ; Lu # [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; Lu # [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW @@ -1342,6 +1352,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1058C..10592 ; Lu # [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE 10594..10595 ; Lu # [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE 10C80..10CB2 ; Lu # [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10D50..10D65 ; Lu # [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA 118A0..118BF ; Lu # [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO 16E40..16E5F ; Lu # [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y 1D400..1D419 ; Lu # [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z @@ -1377,7 +1388,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA 1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA -# Total code points: 1832 +# Total code points: 1855 # ================================================ @@ -1657,6 +1668,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP 10FD..10FF ; Ll # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN 13F8..13FD ; Ll # [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV 1C80..1C88 ; Ll # [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C8A ; Ll # CYRILLIC SMALL LETTER TJE 1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL 1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G 1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK @@ -2007,6 +2019,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 105B3..105B9 ; Ll # [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; Ll # [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE 10CC0..10CF2 ; Ll # [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D70..10D85 ; Ll # [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA 118C0..118DF ; Ll # [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO 16E60..16E7F ; Ll # [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y 1D41A..1D433 ; Ll # [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z @@ -2042,7 +2055,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2233 +# Total code points: 2256 # ================================================ @@ -2125,6 +2138,8 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 10780..10785 ; Lm # [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK 10787..107B0 ; Lm # [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK 107B2..107BA ; Lm # [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10D4E ; Lm # GARAY VOWEL LENGTH MARK +10D6F ; Lm # GARAY REDUPLICATION MARK 16B40..16B43 ; Lm # [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM 16F93..16F9F ; Lm # [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 16FE0..16FE1 ; Lm # [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK @@ -2137,7 +2152,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 1E4EB ; Lm # NAG MUNDARI SIGN OJOD 1E94B ; Lm # ADLAM NASALIZATION MARK -# Total code points: 397 +# Total code points: 399 # ================================================ @@ -2233,7 +2248,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; Lo # TELUGU SIGN AVAGRAHA 0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; Lo # TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; Lo # [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU 0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L @@ -2242,7 +2257,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK 0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; Lo # KANNADA SIGN AVAGRAHA -0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; Lo # [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -2452,6 +2467,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10450..1049D ; Lo # [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO 10500..10527 ; Lo # [40] ELBASAN LETTER A..ELBASAN LETTER KHE 10530..10563 ; Lo # [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +105C0..105F3 ; Lo # [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; Lo # [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; Lo # [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -2483,8 +2499,11 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 10B80..10B91 ; Lo # [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW 10C00..10C48 ; Lo # [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH 10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; Lo # [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4F ; Lo # GARAY SUKUN 10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH 10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN @@ -2556,6 +2575,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 11A5C..11A89 ; Lo # [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; Lo # SOYOMBO MARK PLUTA 11AB0..11AF8 ; Lo # [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; Lo # [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO 11C00..11C08 ; Lo # [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; Lo # [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; Lo # BHAIKSUKI SIGN AVAGRAHA @@ -2655,7 +2675,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 132234 +# Total code points: 132329 # ================================================ @@ -2685,7 +2705,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I 0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; Mn # [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; Mn # [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; Mn # [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; Mn # [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; Mn # DEVANAGARI VOWEL SIGN OE @@ -2883,8 +2903,9 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 10A3F ; Mn # KHAROSHTHI VIRAMA 10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; Mn # [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; Mn # [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; Mn # BRAHMI SIGN ANUSVARA @@ -3008,7 +3029,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL 1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 1985 +# Total code points: 1992 # ================================================ @@ -3254,6 +3275,7 @@ ABF0..ABF9 ; Nd # [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Nd # [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Nd # [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Nd # [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Nd # [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110F0..110F9 ; Nd # [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11136..1113F ; Nd # [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE @@ -3263,9 +3285,11 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Nd # [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Nd # [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Nd # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Nd # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE @@ -3280,7 +3304,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 680 +# Total code points: 720 # ================================================ @@ -3487,9 +3511,10 @@ FE31..FE32 ; Pd # [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FE58 ; Pd # SMALL EM DASH FE63 ; Pd # SMALL HYPHEN-MINUS FF0D ; Pd # FULLWIDTH HYPHEN-MINUS +10D6E ; Pd # GARAY HYPHEN 10EAD ; Pd # YEZIDI HYPHENATION MARK -# Total code points: 26 +# Total code points: 27 # ================================================ @@ -3848,6 +3873,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD 11A9E..11AA2 ; Po # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 11B00..11B09 ; Po # [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU +11BE1 ; Po # SUNUWAR SIGN PVO 11C41..11C45 ; Po # [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 11C70..11C71 ; Po # [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD 11EF7..11EF8 ; Po # [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION @@ -3865,7 +3891,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL 1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS 1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK -# Total code points: 628 +# Total code points: 629 # ================================================ @@ -3924,6 +3950,7 @@ FF5C ; Sm # FULLWIDTH VERTICAL LINE FF5E ; Sm # FULLWIDTH TILDE FFE2 ; Sm # FULLWIDTH NOT SIGN FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +10D8E..10D8F ; Sm # [2] GARAY PLUS SIGN..GARAY MINUS SIGN 1D6C1 ; Sm # MATHEMATICAL BOLD NABLA 1D6DB ; Sm # MATHEMATICAL BOLD PARTIAL DIFFERENTIAL 1D6FB ; Sm # MATHEMATICAL ITALIC NABLA @@ -3936,7 +3963,7 @@ FFE9..FFEC ; Sm # [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW 1D7C3 ; Sm # MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL 1EEF0..1EEF1 ; Sm # [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL -# Total code points: 948 +# Total code points: 950 # ================================================ @@ -4137,6 +4164,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 16B3C..16B3F ; So # [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB 16B45 ; So # PAHAWH HMONG SIGN CIM TSOV ROG 1BC9C ; So # DUPLOYAN SIGN O WITH CROSS +1CEB0..1CEB3 ; So # [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -4181,7 +4209,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B0..1F8B2 ; So # [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP 1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER 1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH @@ -4194,7 +4222,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 6639 +# Total code points: 6644 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt index 364847b91..2589107eb 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt @@ -1,5 +1,5 @@ -# DerivedJoiningGroup-15.1.0.txt -# Date: 2023-01-05, 20:34:37 GMT +# DerivedJoiningGroup-16.0.0.txt +# Date: 2023-10-02, 12:16:28 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -72,8 +72,9 @@ 06EE ; Dal # Lo ARABIC LETTER DAL WITH INVERTED V 0759..075A ; Dal # Lo [2] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER DAL WITH INVERTED SMALL V BELOW 08AE ; Dal # Lo ARABIC LETTER DAL WITH THREE DOTS BELOW +10EC2 ; Dal # Lo ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW -# Total code points: 15 +# Total code points: 16 # ================================================ @@ -177,8 +178,9 @@ 06AC..06AE ; Kaf # Lo [3] ARABIC LETTER KAF WITH DOT ABOVE..ARABIC LETTER KAF WITH THREE DOTS BELOW 077F ; Kaf # Lo ARABIC LETTER KAF WITH TWO DOTS ABOVE 08B4 ; Kaf # Lo ARABIC LETTER KAF WITH DOT BELOW +10EC4 ; Kaf # Lo ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW -# Total code points: 6 +# Total code points: 7 # ================================================ @@ -331,8 +333,9 @@ 069F ; Tah # Lo ARABIC LETTER TAH WITH THREE DOTS ABOVE 088B..088C ; Tah # Lo [2] ARABIC LETTER TAH WITH DOT BELOW..ARABIC LETTER TAH WITH THREE DOTS BELOW 08A3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS ABOVE +10EC3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW -# Total code points: 6 +# Total code points: 7 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt index a4e01e7d3..f18399146 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt @@ -1,5 +1,5 @@ -# DerivedJoiningType-15.1.0.txt -# Date: 2023-01-05, 20:34:38 GMT +# DerivedJoiningType-16.0.0.txt +# Date: 2023-10-23, 18:44:56 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -95,6 +95,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10BAD..10BAE ; D # No [2] PSALTER PAHLAVI NUMBER TEN..PSALTER PAHLAVI NUMBER TWENTY 10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O 10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA +10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL 10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH 10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY @@ -110,7 +111,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FCA ; D # No CHORASMIAN NUMBER TWENTY 1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 610 +# Total code points: 612 # ================================================ @@ -173,6 +174,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10B91 ; R # Lo PSALTER PAHLAVI LETTER TAW 10BA9..10BAC ; R # No [4] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER FOUR 10D22 ; R # Lo HANIFI ROHINGYA MARK SAKIN +10EC2 ; R # Lo ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW 10F33 ; R # Lo SOGDIAN LETTER HE 10F54 ; R # No SOGDIAN NUMBER ONE HUNDRED 10F74..10F75 ; R # Lo [2] OLD UYGHUR LETTER ZAYIN..OLD UYGHUR LETTER FINAL HETH @@ -182,7 +184,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA 10FC2..10FC3 ; R # Lo [2] CHORASMIAN LETTER RESH..CHORASMIAN LETTER SHIN 10FC9 ; R # No CHORASMIAN NUMBER TEN -# Total code points: 152 +# Total code points: 153 # ================================================ @@ -228,7 +230,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA 0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; T # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; T # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; T # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; T # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 093A ; T # Mn DEVANAGARI VOWEL SIGN OE @@ -437,8 +439,9 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI 10A3F ; T # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; T # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; T # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; T # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11001 ; T # Mn BRAHMI SIGN ANUSVARA @@ -568,6 +571,6 @@ E0001 ; T # Cf LANGUAGE TAG E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2150 +# Total code points: 2157 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index c4c351d47..3b8bc4419 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-10-03, 12:05:28 GMT +# Date: 2023-10-24, 09:19:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762724 code points not listed here. -# Total code points: 900192 +# The above property value applies to 762526 code points not listed here. +# Total code points: 899994 # ================================================ @@ -521,6 +521,7 @@ A9F0..A9F9 ; NU # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DI ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 104A0..104A9 ; NU # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; NU # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; NU # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 110BD ; NU # Cf KAITHI NUMBER SIGN 110CD ; NU # Cf KAITHI NUMBER SIGN ABOVE 110F0..110F9 ; NU # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE @@ -531,8 +532,10 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 114D0..114D9 ; NU # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; NU # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; NU # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE @@ -546,7 +549,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 624 +# Total code points: 664 # ================================================ @@ -734,7 +737,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0C2A..0C39 ; AL # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA 0C3D ; AL # Lo TELUGU SIGN AVAGRAHA 0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA -0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU +0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU 0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL 0C78..0C7E ; AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR 0C7F ; AL # So TELUGU SIGN TUUMU @@ -745,7 +748,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 0CAA..0CB3 ; AL # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA 0CB5..0CB9 ; AL # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA 0CBD ; AL # Lo KANNADA SIGN AVAGRAHA -0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA 0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL 0CF1..0CF2 ; AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA 0D04..0D0C ; AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L @@ -851,7 +854,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE 1C4D..1C4F ; AL # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA 1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH 1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD -1C80..1C88 ; AL # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C80..1C8A ; AL # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE 1C90..1CBA ; AL # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN 1CBD..1CBF ; AL # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA @@ -1238,6 +1241,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 105A3..105B1 ; AL # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE 105B3..105B9 ; AL # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE 105BB..105BC ; AL # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +105C0..105F3 ; AL # Lo [52] TODHRI LETTER A..TODHRI LETTER OO 10600..10736 ; AL # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 10740..10755 ; AL # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE 10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 @@ -1296,9 +1300,17 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 10CC0..10CF2 ; AL # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US 10CFA..10CFF ; AL # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND 10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D4A..10D4D ; AL # Lo [4] GARAY VOWEL SIGN A..GARAY VOWEL SIGN EE +10D4E ; AL # Lm GARAY VOWEL LENGTH MARK +10D4F ; AL # Lo GARAY SUKUN +10D50..10D65 ; AL # L& [22] GARAY CAPITAL LETTER A..GARAY CAPITAL LETTER OLD NA +10D6F ; AL # Lm GARAY REDUPLICATION MARK +10D70..10D85 ; AL # L& [22] GARAY SMALL LETTER A..GARAY SMALL LETTER OLD NA +10D8E..10D8F ; AL # Sm [2] GARAY PLUS SIGN..GARAY MINUS SIGN 10E60..10E7E ; AL # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS 10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET 10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW 10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL 10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF 10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH @@ -1373,6 +1385,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 11A5C..11A89 ; AL # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA 11A9D ; AL # Lo SOYOMBO MARK PLUTA 11AB0..11AF8 ; AL # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO +11BE1 ; AL # Po SUNUWAR SIGN PVO 11C00..11C08 ; AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L 11C0A..11C2E ; AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA 11C40 ; AL # Lo BHAIKSUKI SIGN AVAGRAHA @@ -1430,6 +1444,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1BC80..1BC88 ; AL # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL 1BC90..1BC99 ; AL # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW 1BC9C ; AL # So DUPLOYAN SIGN O WITH CROSS +1CEB0..1CEB3 ; AL # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET 1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK 1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO 1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 @@ -1575,12 +1590,13 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 21730 +# Total code points: 21881 # ================================================ @@ -1823,8 +1839,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61978 code points not listed here. -# Total code points: 172568 +# The above property value applies to 61977 code points not listed here. +# Total code points: 172567 # ================================================ @@ -1881,7 +1897,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS 0825..0827 ; CM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U 0829..082D ; CM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA 0859..085B ; CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; CM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA 08CA..08E1 ; CM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA 08E3..0902 ; CM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA 0903 ; CM # Mc DEVANAGARI SIGN VISARGA @@ -2145,8 +2161,9 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 10A3F ; CM # Mn KHAROSHTHI VIRAMA 10AE5..10AE6 ; CM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW 10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D69..10D6D ; CM # Mn [5] GARAY VOWEL SIGN E..GARAY CONSONANT NASALIZATION MARK 10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; CM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA +10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA 10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW 10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW 11000 ; CM # Mc BRAHMI SIGN CANDRABINDU @@ -2336,7 +2353,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2429 +# Total code points: 2436 # ================================================ @@ -2450,6 +2467,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 10A50..10A57 ; BA # Po [8] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION DOUBLE DANDA 10AF0..10AF5 ; BA # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS 10B39..10B3F ; BA # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10D6E ; BA # Pd GARAY HYPHEN 10EAD ; BA # Pd YEZIDI HYPHENATION MARK 11047..11048 ; BA # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA 110BE..110C1 ; BA # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA @@ -2486,7 +2504,7 @@ ABEB ; BA # Po MEETEI MAYEK CHEIKHEI 1BC9F ; BA # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; BA # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 266 +# Total code points: 267 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 649205580..053a07889 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-10-03, 12:05:28 GMT +# Date: 2023-10-24, 09:19:40 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -2098,6 +2098,7 @@ 088E ; ARABIC VERTICAL TAIL 0890 ; ARABIC POUND MARK ABOVE 0891 ; ARABIC PIASTRE MARK ABOVE +0897 ; ARABIC PEPET 0898 ; ARABIC SMALL HIGH WORD AL-JUZ 0899 ; ARABIC SMALL LOW WORD ISHMAAM 089A ; ARABIC SMALL LOW WORD IMAALA @@ -2836,6 +2837,7 @@ 0C58 ; TELUGU LETTER TSA 0C59 ; TELUGU LETTER DZA 0C5A ; TELUGU LETTER RRRA +0C5C ; TELUGU ARCHAIC SHRII 0C5D ; TELUGU LETTER NAKAARA POLLU 0C60 ; TELUGU LETTER VOCALIC RR 0C61 ; TELUGU LETTER VOCALIC LL @@ -2932,6 +2934,7 @@ 0CCD ; KANNADA SIGN VIRAMA 0CD5 ; KANNADA LENGTH MARK 0CD6 ; KANNADA AI LENGTH MARK +0CDC ; KANNADA ARCHAIC SHRII 0CDD ; KANNADA LETTER NAKAARA POLLU 0CDE ; KANNADA LETTER FA 0CE0 ; KANNADA LETTER VOCALIC RR @@ -6486,6 +6489,8 @@ 1C86 ; CYRILLIC SMALL LETTER TALL HARD SIGN 1C87 ; CYRILLIC SMALL LETTER TALL YAT 1C88 ; CYRILLIC SMALL LETTER UNBLENDED UK +1C89 ; CYRILLIC CAPITAL LETTER TJE +1C8A ; CYRILLIC SMALL LETTER TJE 1C90 ; GEORGIAN MTAVRULI CAPITAL LETTER AN 1C91 ; GEORGIAN MTAVRULI CAPITAL LETTER BAN 1C92 ; GEORGIAN MTAVRULI CAPITAL LETTER GAN @@ -28667,6 +28672,58 @@ FFFD ; REPLACEMENT CHARACTER 105B9 ; VITHKUQI SMALL LETTER XE 105BB ; VITHKUQI SMALL LETTER Y 105BC ; VITHKUQI SMALL LETTER ZE +105C0 ; TODHRI LETTER A +105C1 ; TODHRI LETTER AS +105C2 ; TODHRI LETTER BA +105C3 ; TODHRI LETTER MBA +105C4 ; TODHRI LETTER CA +105C5 ; TODHRI LETTER CHA +105C6 ; TODHRI LETTER DA +105C7 ; TODHRI LETTER NDA +105C8 ; TODHRI LETTER DHA +105C9 ; TODHRI LETTER EI +105CA ; TODHRI LETTER E +105CB ; TODHRI LETTER FA +105CC ; TODHRI LETTER GA +105CD ; TODHRI LETTER NGA +105CE ; TODHRI LETTER GJA +105CF ; TODHRI LETTER NGJA +105D0 ; TODHRI LETTER HA +105D1 ; TODHRI LETTER HJA +105D2 ; TODHRI LETTER I +105D3 ; TODHRI LETTER JA +105D4 ; TODHRI LETTER KA +105D5 ; TODHRI LETTER LA +105D6 ; TODHRI LETTER LLA +105D7 ; TODHRI LETTER MA +105D8 ; TODHRI LETTER NA +105D9 ; TODHRI LETTER NJAN +105DA ; TODHRI LETTER O +105DB ; TODHRI LETTER PA +105DC ; TODHRI LETTER QA +105DD ; TODHRI LETTER RA +105DE ; TODHRI LETTER RRA +105DF ; TODHRI LETTER SA +105E0 ; TODHRI LETTER SHA +105E1 ; TODHRI LETTER SHTA +105E2 ; TODHRI LETTER TA +105E3 ; TODHRI LETTER THA +105E4 ; TODHRI LETTER U +105E5 ; TODHRI LETTER VA +105E6 ; TODHRI LETTER XA +105E7 ; TODHRI LETTER NXA +105E8 ; TODHRI LETTER XHA +105E9 ; TODHRI LETTER NXHA +105EA ; TODHRI LETTER Y +105EB ; TODHRI LETTER JY +105EC ; TODHRI LETTER ZA +105ED ; TODHRI LETTER ZHA +105EE ; TODHRI LETTER GHA +105EF ; TODHRI LETTER STA +105F0 ; TODHRI LETTER SKAN +105F1 ; TODHRI LETTER KHA +105F2 ; TODHRI LETTER PSA +105F3 ; TODHRI LETTER OO 10600 ; LINEAR A SIGN AB001 10601 ; LINEAR A SIGN AB002 10602 ; LINEAR A SIGN AB003 @@ -29988,6 +30045,75 @@ FFFD ; REPLACEMENT CHARACTER 10D37 ; HANIFI ROHINGYA DIGIT SEVEN 10D38 ; HANIFI ROHINGYA DIGIT EIGHT 10D39 ; HANIFI ROHINGYA DIGIT NINE +10D40 ; GARAY DIGIT ZERO +10D41 ; GARAY DIGIT ONE +10D42 ; GARAY DIGIT TWO +10D43 ; GARAY DIGIT THREE +10D44 ; GARAY DIGIT FOUR +10D45 ; GARAY DIGIT FIVE +10D46 ; GARAY DIGIT SIX +10D47 ; GARAY DIGIT SEVEN +10D48 ; GARAY DIGIT EIGHT +10D49 ; GARAY DIGIT NINE +10D4A ; GARAY VOWEL SIGN A +10D4B ; GARAY VOWEL SIGN I +10D4C ; GARAY VOWEL SIGN O +10D4D ; GARAY VOWEL SIGN EE +10D4E ; GARAY VOWEL LENGTH MARK +10D4F ; GARAY SUKUN +10D50 ; GARAY CAPITAL LETTER A +10D51 ; GARAY CAPITAL LETTER CA +10D52 ; GARAY CAPITAL LETTER MA +10D53 ; GARAY CAPITAL LETTER KA +10D54 ; GARAY CAPITAL LETTER BA +10D55 ; GARAY CAPITAL LETTER JA +10D56 ; GARAY CAPITAL LETTER SA +10D57 ; GARAY CAPITAL LETTER WA +10D58 ; GARAY CAPITAL LETTER LA +10D59 ; GARAY CAPITAL LETTER GA +10D5A ; GARAY CAPITAL LETTER DA +10D5B ; GARAY CAPITAL LETTER XA +10D5C ; GARAY CAPITAL LETTER YA +10D5D ; GARAY CAPITAL LETTER TA +10D5E ; GARAY CAPITAL LETTER RA +10D5F ; GARAY CAPITAL LETTER NYA +10D60 ; GARAY CAPITAL LETTER FA +10D61 ; GARAY CAPITAL LETTER NA +10D62 ; GARAY CAPITAL LETTER PA +10D63 ; GARAY CAPITAL LETTER HA +10D64 ; GARAY CAPITAL LETTER OLD KA +10D65 ; GARAY CAPITAL LETTER OLD NA +10D69 ; GARAY VOWEL SIGN E +10D6A ; GARAY CONSONANT GEMINATION MARK +10D6B ; GARAY COMBINING DOT ABOVE +10D6C ; GARAY COMBINING DOUBLE DOT ABOVE +10D6D ; GARAY CONSONANT NASALIZATION MARK +10D6E ; GARAY HYPHEN +10D6F ; GARAY REDUPLICATION MARK +10D70 ; GARAY SMALL LETTER A +10D71 ; GARAY SMALL LETTER CA +10D72 ; GARAY SMALL LETTER MA +10D73 ; GARAY SMALL LETTER KA +10D74 ; GARAY SMALL LETTER BA +10D75 ; GARAY SMALL LETTER JA +10D76 ; GARAY SMALL LETTER SA +10D77 ; GARAY SMALL LETTER WA +10D78 ; GARAY SMALL LETTER LA +10D79 ; GARAY SMALL LETTER GA +10D7A ; GARAY SMALL LETTER DA +10D7B ; GARAY SMALL LETTER XA +10D7C ; GARAY SMALL LETTER YA +10D7D ; GARAY SMALL LETTER TA +10D7E ; GARAY SMALL LETTER RA +10D7F ; GARAY SMALL LETTER NYA +10D80 ; GARAY SMALL LETTER FA +10D81 ; GARAY SMALL LETTER NA +10D82 ; GARAY SMALL LETTER PA +10D83 ; GARAY SMALL LETTER HA +10D84 ; GARAY SMALL LETTER OLD KA +10D85 ; GARAY SMALL LETTER OLD NA +10D8E ; GARAY PLUS SIGN +10D8F ; GARAY MINUS SIGN 10E60 ; RUMI DIGIT ONE 10E61 ; RUMI DIGIT TWO 10E62 ; RUMI DIGIT THREE @@ -30066,6 +30192,10 @@ FFFD ; REPLACEMENT CHARACTER 10EAD ; YEZIDI HYPHENATION MARK 10EB0 ; YEZIDI LETTER LAM WITH DOT ABOVE 10EB1 ; YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW +10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW +10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW +10EFC ; ARABIC COMBINING ALEF OVERLAY 10EFD ; ARABIC SMALL LOW WORD SAKTA 10EFE ; ARABIC SMALL LOW WORD QASR 10EFF ; ARABIC SMALL LOW WORD MADDA @@ -31361,6 +31491,26 @@ FFFD ; REPLACEMENT CHARACTER 116C7 ; TAKRI DIGIT SEVEN 116C8 ; TAKRI DIGIT EIGHT 116C9 ; TAKRI DIGIT NINE +116D0 ; MYANMAR PAO DIGIT ZERO +116D1 ; MYANMAR PAO DIGIT ONE +116D2 ; MYANMAR PAO DIGIT TWO +116D3 ; MYANMAR PAO DIGIT THREE +116D4 ; MYANMAR PAO DIGIT FOUR +116D5 ; MYANMAR PAO DIGIT FIVE +116D6 ; MYANMAR PAO DIGIT SIX +116D7 ; MYANMAR PAO DIGIT SEVEN +116D8 ; MYANMAR PAO DIGIT EIGHT +116D9 ; MYANMAR PAO DIGIT NINE +116DA ; MYANMAR EASTERN PWO KAREN DIGIT ZERO +116DB ; MYANMAR EASTERN PWO KAREN DIGIT ONE +116DC ; MYANMAR EASTERN PWO KAREN DIGIT TWO +116DD ; MYANMAR EASTERN PWO KAREN DIGIT THREE +116DE ; MYANMAR EASTERN PWO KAREN DIGIT FOUR +116DF ; MYANMAR EASTERN PWO KAREN DIGIT FIVE +116E0 ; MYANMAR EASTERN PWO KAREN DIGIT SIX +116E1 ; MYANMAR EASTERN PWO KAREN DIGIT SEVEN +116E2 ; MYANMAR EASTERN PWO KAREN DIGIT EIGHT +116E3 ; MYANMAR EASTERN PWO KAREN DIGIT NINE 11700 ; AHOM LETTER KA 11701 ; AHOM LETTER KHA 11702 ; AHOM LETTER NGA @@ -31945,6 +32095,50 @@ FFFD ; REPLACEMENT CHARACTER 11B07 ; DEVANAGARI SIGN WESTERN NINE-LIKE BHALE 11B08 ; DEVANAGARI SIGN REVERSED NINE-LIKE BHALE 11B09 ; DEVANAGARI SIGN MINDU +11BC0 ; SUNUWAR LETTER DEVI +11BC1 ; SUNUWAR LETTER TASLA +11BC2 ; SUNUWAR LETTER EKO +11BC3 ; SUNUWAR LETTER IMAR +11BC4 ; SUNUWAR LETTER REU +11BC5 ; SUNUWAR LETTER UTTHI +11BC6 ; SUNUWAR LETTER KIK +11BC7 ; SUNUWAR LETTER MA +11BC8 ; SUNUWAR LETTER APPHO +11BC9 ; SUNUWAR LETTER PIP +11BCA ; SUNUWAR LETTER GIL +11BCB ; SUNUWAR LETTER HAMSO +11BCC ; SUNUWAR LETTER CARMI +11BCD ; SUNUWAR LETTER NAH +11BCE ; SUNUWAR LETTER BUR +11BCF ; SUNUWAR LETTER JYAH +11BD0 ; SUNUWAR LETTER LOACHA +11BD1 ; SUNUWAR LETTER OTTHI +11BD2 ; SUNUWAR LETTER SHYELE +11BD3 ; SUNUWAR LETTER VARCA +11BD4 ; SUNUWAR LETTER YAT +11BD5 ; SUNUWAR LETTER AVA +11BD6 ; SUNUWAR LETTER AAL +11BD7 ; SUNUWAR LETTER DONGA +11BD8 ; SUNUWAR LETTER THARI +11BD9 ; SUNUWAR LETTER PHAR +11BDA ; SUNUWAR LETTER NGAR +11BDB ; SUNUWAR LETTER KHA +11BDC ; SUNUWAR LETTER SHYER +11BDD ; SUNUWAR LETTER CHELAP +11BDE ; SUNUWAR LETTER TENTU +11BDF ; SUNUWAR LETTER THELE +11BE0 ; SUNUWAR LETTER KLOKO +11BE1 ; SUNUWAR SIGN PVO +11BF0 ; SUNUWAR DIGIT ZERO +11BF1 ; SUNUWAR DIGIT ONE +11BF2 ; SUNUWAR DIGIT TWO +11BF3 ; SUNUWAR DIGIT THREE +11BF4 ; SUNUWAR DIGIT FOUR +11BF5 ; SUNUWAR DIGIT FIVE +11BF6 ; SUNUWAR DIGIT SIX +11BF7 ; SUNUWAR DIGIT SEVEN +11BF8 ; SUNUWAR DIGIT EIGHT +11BF9 ; SUNUWAR DIGIT NINE 11C00 ; BHAIKSUKI LETTER A 11C01 ; BHAIKSUKI LETTER AA 11C02 ; BHAIKSUKI LETTER I @@ -37780,6 +37974,10 @@ FFFD ; REPLACEMENT CHARACTER 1BCA1 ; SHORTHAND FORMAT CONTINUING OVERLAP 1BCA2 ; SHORTHAND FORMAT DOWN STEP 1BCA3 ; SHORTHAND FORMAT UP STEP +1CEB0 ; HORIZONTAL ZIGZAG LINE +1CEB1 ; KEYHOLE +1CEB2 ; OLD PERSONAL COMPUTER WITH MONITOR IN PORTRAIT ORIENTATION +1CEB3 ; BLACK RIGHT TRIANGLE CARET 1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT 1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT 1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT @@ -43157,6 +43355,7 @@ FFFD ; REPLACEMENT CHARACTER 1F8AD ; WHITE ARROW SHAFT WIDTH TWO THIRDS 1F8B0 ; ARROW POINTING UPWARDS THEN NORTH WEST 1F8B1 ; ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F8B2 ; RIGHTWARDS ARROW WITH LOWER HOOK 1F900 ; CIRCLED CROSS FORMEE WITH FOUR DOTS 1F901 ; CIRCLED CROSS FORMEE WITH TWO DOTS 1F902 ; CIRCLED CROSS FORMEE @@ -44177,6 +44376,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 149814 +# Total code points: 150013 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt index 062f4fbe5..438477313 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt @@ -1,5 +1,5 @@ -# DerivedNumericType-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT +# DerivedNumericType-16.0.0.txt +# Date: 2023-10-23, 18:44:59 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -260,6 +260,7 @@ ABF0..ABF9 ; Decimal # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT N FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 104A0..104A9 ; Decimal # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE 10D30..10D39 ; Decimal # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10D40..10D49 ; Decimal # Nd [10] GARAY DIGIT ZERO..GARAY DIGIT NINE 11066..1106F ; Decimal # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE 110F0..110F9 ; Decimal # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE 11136..1113F ; Decimal # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE @@ -269,9 +270,11 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE 11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE 116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +116D0..116E3 ; Decimal # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE 11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE 118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE 11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +11BF0..11BF9 ; Decimal # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE 11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE 11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE 11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE @@ -286,6 +289,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE 1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE 1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE -# Total code points: 680 +# Total code points: 720 # EOF diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt index e67164682..c4bbab74b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt @@ -1,5 +1,5 @@ -# DerivedNumericValues-15.1.0.txt -# Date: 2023-01-05, 20:34:41 GMT +# DerivedNumericValues-16.0.0.txt +# Date: 2023-10-23, 18:45:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -91,6 +91,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1018A ; 0.0 ; ; 0 # No GREEK ZERO SIGN 104A0 ; 0.0 ; ; 0 # Nd OSMANYA DIGIT ZERO 10D30 ; 0.0 ; ; 0 # Nd HANIFI ROHINGYA DIGIT ZERO +10D40 ; 0.0 ; ; 0 # Nd GARAY DIGIT ZERO 11066 ; 0.0 ; ; 0 # Nd BRAHMI DIGIT ZERO 110F0 ; 0.0 ; ; 0 # Nd SORA SOMPENG DIGIT ZERO 11136 ; 0.0 ; ; 0 # Nd CHAKMA DIGIT ZERO @@ -100,9 +101,12 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 114D0 ; 0.0 ; ; 0 # Nd TIRHUTA DIGIT ZERO 11650 ; 0.0 ; ; 0 # Nd MODI DIGIT ZERO 116C0 ; 0.0 ; ; 0 # Nd TAKRI DIGIT ZERO +116D0 ; 0.0 ; ; 0 # Nd MYANMAR PAO DIGIT ZERO +116DA ; 0.0 ; ; 0 # Nd MYANMAR EASTERN PWO KAREN DIGIT ZERO 11730 ; 0.0 ; ; 0 # Nd AHOM DIGIT ZERO 118E0 ; 0.0 ; ; 0 # Nd WARANG CITI DIGIT ZERO 11950 ; 0.0 ; ; 0 # Nd DIVES AKURU DIGIT ZERO +11BF0 ; 0.0 ; ; 0 # Nd SUNUWAR DIGIT ZERO 11C50 ; 0.0 ; ; 0 # Nd BHAIKSUKI DIGIT ZERO 11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO 11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO @@ -126,7 +130,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO 1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO 1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO -# Total code points: 88 +# Total code points: 92 # ================================================ @@ -496,6 +500,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 10BA9 ; 1.0 ; ; 1 # No PSALTER PAHLAVI NUMBER ONE 10CFA ; 1.0 ; ; 1 # No OLD HUNGARIAN NUMBER ONE 10D31 ; 1.0 ; ; 1 # Nd HANIFI ROHINGYA DIGIT ONE +10D41 ; 1.0 ; ; 1 # Nd GARAY DIGIT ONE 10E60 ; 1.0 ; ; 1 # No RUMI DIGIT ONE 10F1D ; 1.0 ; ; 1 # No OLD SOGDIAN NUMBER ONE 10F51 ; 1.0 ; ; 1 # No SOGDIAN NUMBER ONE @@ -511,9 +516,12 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 114D1 ; 1.0 ; ; 1 # Nd TIRHUTA DIGIT ONE 11651 ; 1.0 ; ; 1 # Nd MODI DIGIT ONE 116C1 ; 1.0 ; ; 1 # Nd TAKRI DIGIT ONE +116D1 ; 1.0 ; ; 1 # Nd MYANMAR PAO DIGIT ONE +116DB ; 1.0 ; ; 1 # Nd MYANMAR EASTERN PWO KAREN DIGIT ONE 11731 ; 1.0 ; ; 1 # Nd AHOM DIGIT ONE 118E1 ; 1.0 ; ; 1 # Nd WARANG CITI DIGIT ONE 11951 ; 1.0 ; ; 1 # Nd DIVES AKURU DIGIT ONE +11BF1 ; 1.0 ; ; 1 # Nd SUNUWAR DIGIT ONE 11C51 ; 1.0 ; ; 1 # Nd BHAIKSUKI DIGIT ONE 11C5A ; 1.0 ; ; 1 # No BHAIKSUKI NUMBER ONE 11D51 ; 1.0 ; ; 1 # Nd MASARAM GONDI DIGIT ONE @@ -553,7 +561,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE 1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE 2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A -# Total code points: 144 +# Total code points: 148 # ================================================ @@ -647,6 +655,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 10B79 ; 2.0 ; ; 2 # No INSCRIPTIONAL PAHLAVI NUMBER TWO 10BAA ; 2.0 ; ; 2 # No PSALTER PAHLAVI NUMBER TWO 10D32 ; 2.0 ; ; 2 # Nd HANIFI ROHINGYA DIGIT TWO +10D42 ; 2.0 ; ; 2 # Nd GARAY DIGIT TWO 10E61 ; 2.0 ; ; 2 # No RUMI DIGIT TWO 10F1E ; 2.0 ; ; 2 # No OLD SOGDIAN NUMBER TWO 10FC6 ; 2.0 ; ; 2 # No CHORASMIAN NUMBER TWO @@ -661,9 +670,12 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 114D2 ; 2.0 ; ; 2 # Nd TIRHUTA DIGIT TWO 11652 ; 2.0 ; ; 2 # Nd MODI DIGIT TWO 116C2 ; 2.0 ; ; 2 # Nd TAKRI DIGIT TWO +116D2 ; 2.0 ; ; 2 # Nd MYANMAR PAO DIGIT TWO +116DC ; 2.0 ; ; 2 # Nd MYANMAR EASTERN PWO KAREN DIGIT TWO 11732 ; 2.0 ; ; 2 # Nd AHOM DIGIT TWO 118E2 ; 2.0 ; ; 2 # Nd WARANG CITI DIGIT TWO 11952 ; 2.0 ; ; 2 # Nd DIVES AKURU DIGIT TWO +11BF2 ; 2.0 ; ; 2 # Nd SUNUWAR DIGIT TWO 11C52 ; 2.0 ; ; 2 # Nd BHAIKSUKI DIGIT TWO 11C5B ; 2.0 ; ; 2 # No BHAIKSUKI NUMBER TWO 11D52 ; 2.0 ; ; 2 # Nd MASARAM GONDI DIGIT TWO @@ -707,7 +719,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO 1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO 22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390 -# Total code points: 146 +# Total code points: 150 # ================================================ @@ -792,6 +804,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 10B7A ; 3.0 ; ; 3 # No INSCRIPTIONAL PAHLAVI NUMBER THREE 10BAB ; 3.0 ; ; 3 # No PSALTER PAHLAVI NUMBER THREE 10D33 ; 3.0 ; ; 3 # Nd HANIFI ROHINGYA DIGIT THREE +10D43 ; 3.0 ; ; 3 # Nd GARAY DIGIT THREE 10E62 ; 3.0 ; ; 3 # No RUMI DIGIT THREE 10F1F ; 3.0 ; ; 3 # No OLD SOGDIAN NUMBER THREE 10FC7 ; 3.0 ; ; 3 # No CHORASMIAN NUMBER THREE @@ -806,9 +819,12 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 114D3 ; 3.0 ; ; 3 # Nd TIRHUTA DIGIT THREE 11653 ; 3.0 ; ; 3 # Nd MODI DIGIT THREE 116C3 ; 3.0 ; ; 3 # Nd TAKRI DIGIT THREE +116D3 ; 3.0 ; ; 3 # Nd MYANMAR PAO DIGIT THREE +116DD ; 3.0 ; ; 3 # Nd MYANMAR EASTERN PWO KAREN DIGIT THREE 11733 ; 3.0 ; ; 3 # Nd AHOM DIGIT THREE 118E3 ; 3.0 ; ; 3 # Nd WARANG CITI DIGIT THREE 11953 ; 3.0 ; ; 3 # Nd DIVES AKURU DIGIT THREE +11BF3 ; 3.0 ; ; 3 # Nd SUNUWAR DIGIT THREE 11C53 ; 3.0 ; ; 3 # Nd BHAIKSUKI DIGIT THREE 11C5C ; 3.0 ; ; 3 # No BHAIKSUKI NUMBER THREE 11D53 ; 3.0 ; ; 3 # Nd MASARAM GONDI DIGIT THREE @@ -855,7 +871,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE 22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998 23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B -# Total code points: 144 +# Total code points: 148 # ================================================ @@ -934,6 +950,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 10B7B ; 4.0 ; ; 4 # No INSCRIPTIONAL PAHLAVI NUMBER FOUR 10BAC ; 4.0 ; ; 4 # No PSALTER PAHLAVI NUMBER FOUR 10D34 ; 4.0 ; ; 4 # Nd HANIFI ROHINGYA DIGIT FOUR +10D44 ; 4.0 ; ; 4 # Nd GARAY DIGIT FOUR 10E63 ; 4.0 ; ; 4 # No RUMI DIGIT FOUR 10F20 ; 4.0 ; ; 4 # No OLD SOGDIAN NUMBER FOUR 10FC8 ; 4.0 ; ; 4 # No CHORASMIAN NUMBER FOUR @@ -948,9 +965,12 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 114D4 ; 4.0 ; ; 4 # Nd TIRHUTA DIGIT FOUR 11654 ; 4.0 ; ; 4 # Nd MODI DIGIT FOUR 116C4 ; 4.0 ; ; 4 # Nd TAKRI DIGIT FOUR +116D4 ; 4.0 ; ; 4 # Nd MYANMAR PAO DIGIT FOUR +116DE ; 4.0 ; ; 4 # Nd MYANMAR EASTERN PWO KAREN DIGIT FOUR 11734 ; 4.0 ; ; 4 # Nd AHOM DIGIT FOUR 118E4 ; 4.0 ; ; 4 # Nd WARANG CITI DIGIT FOUR 11954 ; 4.0 ; ; 4 # Nd DIVES AKURU DIGIT FOUR +11BF4 ; 4.0 ; ; 4 # Nd SUNUWAR DIGIT FOUR 11C54 ; 4.0 ; ; 4 # Nd BHAIKSUKI DIGIT FOUR 11C5D ; 4.0 ; ; 4 # No BHAIKSUKI NUMBER FOUR 11D54 ; 4.0 ; ; 4 # Nd MASARAM GONDI DIGIT FOUR @@ -996,7 +1016,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR 200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2 2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D -# Total code points: 135 +# Total code points: 139 # ================================================ @@ -1080,6 +1100,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 10AEC ; 5.0 ; ; 5 # No MANICHAEAN NUMBER FIVE 10CFB ; 5.0 ; ; 5 # No OLD HUNGARIAN NUMBER FIVE 10D35 ; 5.0 ; ; 5 # Nd HANIFI ROHINGYA DIGIT FIVE +10D45 ; 5.0 ; ; 5 # Nd GARAY DIGIT FIVE 10E64 ; 5.0 ; ; 5 # No RUMI DIGIT FIVE 10F21 ; 5.0 ; ; 5 # No OLD SOGDIAN NUMBER FIVE 11056 ; 5.0 ; ; 5 # No BRAHMI NUMBER FIVE @@ -1093,9 +1114,12 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 114D5 ; 5.0 ; ; 5 # Nd TIRHUTA DIGIT FIVE 11655 ; 5.0 ; ; 5 # Nd MODI DIGIT FIVE 116C5 ; 5.0 ; ; 5 # Nd TAKRI DIGIT FIVE +116D5 ; 5.0 ; ; 5 # Nd MYANMAR PAO DIGIT FIVE +116DF ; 5.0 ; ; 5 # Nd MYANMAR EASTERN PWO KAREN DIGIT FIVE 11735 ; 5.0 ; ; 5 # Nd AHOM DIGIT FIVE 118E5 ; 5.0 ; ; 5 # Nd WARANG CITI DIGIT FIVE 11955 ; 5.0 ; ; 5 # Nd DIVES AKURU DIGIT FIVE +11BF5 ; 5.0 ; ; 5 # Nd SUNUWAR DIGIT FIVE 11C55 ; 5.0 ; ; 5 # Nd BHAIKSUKI DIGIT FIVE 11C5E ; 5.0 ; ; 5 # No BHAIKSUKI NUMBER FIVE 11D55 ; 5.0 ; ; 5 # Nd MASARAM GONDI DIGIT FIVE @@ -1139,7 +1163,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE 1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE 20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121 -# Total code points: 133 +# Total code points: 137 # ================================================ @@ -1214,6 +1238,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 104A6 ; 6.0 ; ; 6 # Nd OSMANYA DIGIT SIX 109C5 ; 6.0 ; ; 6 # No MEROITIC CURSIVE NUMBER SIX 10D36 ; 6.0 ; ; 6 # Nd HANIFI ROHINGYA DIGIT SIX +10D46 ; 6.0 ; ; 6 # Nd GARAY DIGIT SIX 10E65 ; 6.0 ; ; 6 # No RUMI DIGIT SIX 11057 ; 6.0 ; ; 6 # No BRAHMI NUMBER SIX 1106C ; 6.0 ; ; 6 # Nd BRAHMI DIGIT SIX @@ -1226,9 +1251,12 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 114D6 ; 6.0 ; ; 6 # Nd TIRHUTA DIGIT SIX 11656 ; 6.0 ; ; 6 # Nd MODI DIGIT SIX 116C6 ; 6.0 ; ; 6 # Nd TAKRI DIGIT SIX +116D6 ; 6.0 ; ; 6 # Nd MYANMAR PAO DIGIT SIX +116E0 ; 6.0 ; ; 6 # Nd MYANMAR EASTERN PWO KAREN DIGIT SIX 11736 ; 6.0 ; ; 6 # Nd AHOM DIGIT SIX 118E6 ; 6.0 ; ; 6 # Nd WARANG CITI DIGIT SIX 11956 ; 6.0 ; ; 6 # Nd DIVES AKURU DIGIT SIX +11BF6 ; 6.0 ; ; 6 # Nd SUNUWAR DIGIT SIX 11C56 ; 6.0 ; ; 6 # Nd BHAIKSUKI DIGIT SIX 11C5F ; 6.0 ; ; 6 # No BHAIKSUKI NUMBER SIX 11D56 ; 6.0 ; ; 6 # Nd MASARAM GONDI DIGIT SIX @@ -1267,7 +1295,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX 1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX 20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA -# Total code points: 117 +# Total code points: 121 # ================================================ @@ -1341,6 +1369,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 104A7 ; 7.0 ; ; 7 # Nd OSMANYA DIGIT SEVEN 109C6 ; 7.0 ; ; 7 # No MEROITIC CURSIVE NUMBER SEVEN 10D37 ; 7.0 ; ; 7 # Nd HANIFI ROHINGYA DIGIT SEVEN +10D47 ; 7.0 ; ; 7 # Nd GARAY DIGIT SEVEN 10E66 ; 7.0 ; ; 7 # No RUMI DIGIT SEVEN 11058 ; 7.0 ; ; 7 # No BRAHMI NUMBER SEVEN 1106D ; 7.0 ; ; 7 # Nd BRAHMI DIGIT SEVEN @@ -1353,9 +1382,12 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 114D7 ; 7.0 ; ; 7 # Nd TIRHUTA DIGIT SEVEN 11657 ; 7.0 ; ; 7 # Nd MODI DIGIT SEVEN 116C7 ; 7.0 ; ; 7 # Nd TAKRI DIGIT SEVEN +116D7 ; 7.0 ; ; 7 # Nd MYANMAR PAO DIGIT SEVEN +116E1 ; 7.0 ; ; 7 # Nd MYANMAR EASTERN PWO KAREN DIGIT SEVEN 11737 ; 7.0 ; ; 7 # Nd AHOM DIGIT SEVEN 118E7 ; 7.0 ; ; 7 # Nd WARANG CITI DIGIT SEVEN 11957 ; 7.0 ; ; 7 # Nd DIVES AKURU DIGIT SEVEN +11BF7 ; 7.0 ; ; 7 # Nd SUNUWAR DIGIT SEVEN 11C57 ; 7.0 ; ; 7 # Nd BHAIKSUKI DIGIT SEVEN 11C60 ; 7.0 ; ; 7 # No BHAIKSUKI NUMBER SEVEN 11D57 ; 7.0 ; ; 7 # Nd MASARAM GONDI DIGIT SEVEN @@ -1393,7 +1425,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN 1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN 20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001 -# Total code points: 117 +# Total code points: 121 # ================================================ @@ -1464,6 +1496,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 104A8 ; 8.0 ; ; 8 # Nd OSMANYA DIGIT EIGHT 109C7 ; 8.0 ; ; 8 # No MEROITIC CURSIVE NUMBER EIGHT 10D38 ; 8.0 ; ; 8 # Nd HANIFI ROHINGYA DIGIT EIGHT +10D48 ; 8.0 ; ; 8 # Nd GARAY DIGIT EIGHT 10E67 ; 8.0 ; ; 8 # No RUMI DIGIT EIGHT 11059 ; 8.0 ; ; 8 # No BRAHMI NUMBER EIGHT 1106E ; 8.0 ; ; 8 # Nd BRAHMI DIGIT EIGHT @@ -1476,9 +1509,12 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 114D8 ; 8.0 ; ; 8 # Nd TIRHUTA DIGIT EIGHT 11658 ; 8.0 ; ; 8 # Nd MODI DIGIT EIGHT 116C8 ; 8.0 ; ; 8 # Nd TAKRI DIGIT EIGHT +116D8 ; 8.0 ; ; 8 # Nd MYANMAR PAO DIGIT EIGHT +116E2 ; 8.0 ; ; 8 # Nd MYANMAR EASTERN PWO KAREN DIGIT EIGHT 11738 ; 8.0 ; ; 8 # Nd AHOM DIGIT EIGHT 118E8 ; 8.0 ; ; 8 # Nd WARANG CITI DIGIT EIGHT 11958 ; 8.0 ; ; 8 # Nd DIVES AKURU DIGIT EIGHT +11BF8 ; 8.0 ; ; 8 # Nd SUNUWAR DIGIT EIGHT 11C58 ; 8.0 ; ; 8 # Nd BHAIKSUKI DIGIT EIGHT 11C61 ; 8.0 ; ; 8 # No BHAIKSUKI NUMBER EIGHT 11D58 ; 8.0 ; ; 8 # Nd MASARAM GONDI DIGIT EIGHT @@ -1515,7 +1551,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT 1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA 1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT -# Total code points: 112 +# Total code points: 116 # ================================================ @@ -1589,6 +1625,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 104A9 ; 9.0 ; ; 9 # Nd OSMANYA DIGIT NINE 109C8 ; 9.0 ; ; 9 # No MEROITIC CURSIVE NUMBER NINE 10D39 ; 9.0 ; ; 9 # Nd HANIFI ROHINGYA DIGIT NINE +10D49 ; 9.0 ; ; 9 # Nd GARAY DIGIT NINE 10E68 ; 9.0 ; ; 9 # No RUMI DIGIT NINE 1105A ; 9.0 ; ; 9 # No BRAHMI NUMBER NINE 1106F ; 9.0 ; ; 9 # Nd BRAHMI DIGIT NINE @@ -1601,9 +1638,12 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 114D9 ; 9.0 ; ; 9 # Nd TIRHUTA DIGIT NINE 11659 ; 9.0 ; ; 9 # Nd MODI DIGIT NINE 116C9 ; 9.0 ; ; 9 # Nd TAKRI DIGIT NINE +116D9 ; 9.0 ; ; 9 # Nd MYANMAR PAO DIGIT NINE +116E3 ; 9.0 ; ; 9 # Nd MYANMAR EASTERN PWO KAREN DIGIT NINE 11739 ; 9.0 ; ; 9 # Nd AHOM DIGIT NINE 118E9 ; 9.0 ; ; 9 # Nd WARANG CITI DIGIT NINE 11959 ; 9.0 ; ; 9 # Nd DIVES AKURU DIGIT NINE +11BF9 ; 9.0 ; ; 9 # Nd SUNUWAR DIGIT NINE 11C59 ; 9.0 ; ; 9 # Nd BHAIKSUKI DIGIT NINE 11C62 ; 9.0 ; ; 9 # No BHAIKSUKI NUMBER NINE 11D59 ; 9.0 ; ; 9 # Nd MASARAM GONDI DIGIT NINE @@ -1641,7 +1681,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE 1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE 2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890 -# Total code points: 118 +# Total code points: 122 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java b/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java index 712d5e0c0..c17f3c326 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java @@ -111,6 +111,11 @@ public boolean hasNext() { return false; } line = line2 = rawLines.next(); + if (line.startsWith("<<<<<<<") + || line.startsWith("=======") + || line.startsWith(">>>>>>>")) { + line2 = ""; + } ++stats.lineCount; final int hashPos = line2.indexOf('#'); if (hashPos >= 0) { diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index c5eb7e092..7bfd6bb67 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -283,6 +283,7 @@ public enum Block_Values implements Named { Ethiopic_Extended_A("Ethiopic_Ext_A"), Ethiopic_Extended_B("Ethiopic_Ext_B"), Ethiopic_Supplement("Ethiopic_Sup"), + Garay("Garay"), Geometric_Shapes("Geometric_Shapes"), Geometric_Shapes_Extended("Geometric_Shapes_Ext"), Georgian("Georgian"), @@ -393,6 +394,7 @@ public enum Block_Values implements Named { Myanmar("Myanmar"), Myanmar_Extended_A("Myanmar_Ext_A"), Myanmar_Extended_B("Myanmar_Ext_B"), + Myanmar_Extended_C("Myanmar_Ext_C"), Nabataean("Nabataean"), Nag_Mundari("Nag_Mundari"), Nandinagari("Nandinagari"), @@ -451,6 +453,7 @@ public enum Block_Values implements Named { Specials("Specials"), Sundanese("Sundanese"), Sundanese_Supplement("Sundanese_Sup"), + Sunuwar("Sunuwar"), Supplemental_Arrows_A("Sup_Arrows_A"), Supplemental_Arrows_B("Sup_Arrows_B"), Supplemental_Arrows_C("Sup_Arrows_C"), @@ -464,6 +467,7 @@ public enum Block_Values implements Named { Syloti_Nagri("Syloti_Nagri"), Symbols_And_Pictographs_Extended_A("Symbols_And_Pictographs_Ext_A"), Symbols_For_Legacy_Computing("Symbols_For_Legacy_Computing"), + Symbols_For_Legacy_Computing_Supplement("Symbols_For_Legacy_Computing_Sup"), Syriac("Syriac"), Syriac_Supplement("Syriac_Sup"), Tagalog("Tagalog"), @@ -486,6 +490,7 @@ public enum Block_Values implements Named { Tibetan("Tibetan"), Tifinagh("Tifinagh"), Tirhuta("Tirhuta"), + Todhri("Todhri"), Toto("Toto"), Transport_And_Map_Symbols("Transport_And_Map"), Unified_Canadian_Aboriginal_Syllabics("UCAS", "Canadian_Syllabics"), @@ -1736,6 +1741,7 @@ public enum Script_Values implements Named { Elbasan("Elba"), Elymaic("Elym"), Ethiopic("Ethi"), + Garay("Gara"), Georgian("Geor"), Glagolitic("Glag"), Gunjala_Gondi("Gong"), @@ -1833,6 +1839,7 @@ public enum Script_Values implements Named { Sora_Sompeng("Sora"), Soyombo("Soyo"), Sundanese("Sund"), + Sunuwar("Sunu"), Syloti_Nagri("Sylo"), Syriac("Syrc"), Tagbanwa("Tagb"), @@ -1850,6 +1857,7 @@ public enum Script_Values implements Named { Tibetan("Tibt"), Tirhuta("Tirh"), Tangsa("Tnsa"), + Todhri("Todr"), Toto("Toto"), Ugaritic("Ugar"), Vai("Vaii"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java b/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java index b46b53c81..2d733a054 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java +++ b/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java @@ -139,9 +139,37 @@ public static final String getSampleCharacter(int reorderCode) { // TODO: // - Remove scripts supported by ICU4J UScript and CLDR ScriptMetadata. // - Add scripts not yet supported there. + // + // See https://www.unicode.org/alloc/Pipeline.html + // and https://cldr.unicode.org/development/updating-codes/updating-script-metadata switch (reorderCode) { // case UCD_Types.Old_Hungarian: // return "𐲡"; + // Approved for Unicode 16: + case UCD_Types.Garay: + return "\uD803\uDD5D"; + case UCD_Types.Gurung_Khema: + return "\uD818\uDD1C"; + case UCD_Types.Kirat_Rai: + return "\uD81B\uDD45"; + case UCD_Types.Ol_Onal: + return "\uD839\uDDD0"; + case UCD_Types.Sunuwar: + return "\uD806\uDFC4"; + case UCD_Types.Todhri: + return "\uD801\uDDC2"; + case UCD_Types.Tulu_Tigalari: + return "\uD804\uDF92"; + + // Provisionally assigned so far: + case UCD_Types.Chisoi: + return "\uD81B\uDD93"; + case UCD_Types.Sidetic: + return "\uD802\uDD50"; + case UCD_Types.Tai_Yo: + return "\uD839\uDED5"; + case UCD_Types.Tolong_Siki: + return "\uD807\uDDC6"; default: throw new UnsupportedOperationException("unknown reorderCode " + reorderCode); } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index e1ff508ad..759361106 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -67,6 +67,7 @@ static class Format { Map> fileToPropertySet = new TreeMap>(); Map fileToComments = new TreeMap(); Map fileToDirectory = new TreeMap(); + Map> propertyToOrderedValues = new TreeMap>(); Map> propertyToValueToComments = new TreeMap>(); Map hackMap = new HashMap(); @@ -110,6 +111,12 @@ public static class PrintStyle { // Unicode 15.1 and later LineBreak.txt and EastAsianWidth.txt, which are all generated // in that format by some other tool. boolean kenFile = false; + // Whether the file should be produced in the style of IndicPositionalCategory.txt and + // IndicSyllabicCategory.txt, which are both generated in that format by some other + // tool. + boolean roozbehFile = false; + // Whether to separate values of enumerated properties using a line of equal signs. + boolean separateValues = true; boolean hackValues = false; boolean mergeRanges = true; String nameStyle = "none"; @@ -138,6 +145,10 @@ String parse(String options) { interleaveValues = true; } else if (piece.equals("kenFile")) { kenFile = true; + } else if (piece.equals("roozbehFile")) { + roozbehFile = true; + } else if (piece.startsWith("separateValues=")) { + separateValues = afterEqualsBoolean(piece); } else if (piece.equals("hackValues")) { hackValues = true; } else if (piece.equals("sortNumeric")) { @@ -301,6 +312,10 @@ private void build() { } line = line.trim(); if (line.length() == 0) { + if (comments.length() != 0) { + // Preserve blank lines between comments. + comments += "\n"; + } continue; } if (DEBUG) { @@ -321,6 +336,7 @@ private void build() { comments += line; } else { // end of comments, roll up + comments = comments.trim(); if (comments.length() != 0) { if (property != null) { addValueComments(property, value, comments); @@ -350,6 +366,10 @@ private void build() { value = ""; } else if (line.startsWith("Value:")) { value = lineValue; + final var values = + propertyToOrderedValues.computeIfAbsent( + property, k -> new ArrayList()); + values.add(value); } else if (line.startsWith("HackName:")) { final String regularItem = Utility.getUnskeleton(lineValue, true); hackMap.put(regularItem, lineValue); @@ -1152,6 +1172,9 @@ public static void generatePropertyFile(String filename) throws IOException { filename, Format.theFormat.getPrintStyle(name)); if (!ps.kenFile) { pwProp.println(); + if (!ps.separateValues) { + pwProp.println(); + } pwProp.println(SEPARATOR); } final String propComment = Format.theFormat.getValueComments(name, ""); @@ -1161,7 +1184,11 @@ public static void generatePropertyFile(String filename) throws IOException { pwProp.println(propComment); } else if (!prop.isType(UnicodeProperty.BINARY_MASK)) { pwProp.println(); - pwProp.println("# Property:\t" + name); + if (ps.roozbehFile) { + pwProp.println("# Property: " + name); + } else { + pwProp.println("# Property:\t" + name); + } } } @@ -1182,9 +1209,12 @@ public static void generatePropertyFile(String filename) throws IOException { v = v + " (" + v2 + ")"; } } - pwProp.println(); + pwProp.println(ps.roozbehFile ? "#" : ""); pwProp.println("# All code points not explicitly listed for " + prop.getName()); - pwProp.println("# have the value " + v + "."); + pwProp.println( + "# have the value " + + v + + (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : ".")); } if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) { @@ -1254,6 +1284,21 @@ private static void writeEnumeratedValues( temp2.addAll(aliases); aliases = temp2; } + if (ps.roozbehFile) { + aliases.removeIf(alias -> UnicodeProperty.compareNames(alias, ps.skipValue) == 0); + if (!Format.theFormat + .propertyToOrderedValues + .get(prop.getName()) + .containsAll(aliases)) { + final TreeSet missingAliases = new TreeSet(aliases); + missingAliases.removeAll( + Format.theFormat.propertyToOrderedValues.get(prop.getName())); + throw new IllegalArgumentException( + "All values must be listed when using roozbehFile; missing " + + missingAliases); + } + aliases = Format.theFormat.propertyToOrderedValues.get(prop.getName()); + } if (ps.sortNumeric) { if (DEBUG) { System.out.println("Reordering"); @@ -1284,7 +1329,7 @@ private static void writeEnumeratedValues( final String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue; if (missing != null && !missing.equals(UCD_Names.NO)) { - pw.println(); + pw.println(ps.roozbehFile ? "#" : ""); final String propName = bf.getPropName(); // if (propName == null) propName = ""; // else if (propName.length() != 0) propName = propName + "; "; @@ -1302,6 +1347,10 @@ private static void writeEnumeratedValues( writeEnumeratedMissingValues(pw, overallDefault, defaultLbValues); } } + if (!ps.separateValues) { + pw.println(); + pw.println(SEPARATOR.replace('=', '-')); + } for (final Iterator it = aliases.iterator(); it.hasNext(); ) { final String value = it.next(); if (DEBUG) { @@ -1416,9 +1465,13 @@ private static void writeEnumeratedValues( if (!prop.isType(UnicodeProperty.BINARY_MASK)) { pw.println(); - pw.println(SEPARATOR); + if (ps.separateValues) { + pw.println(SEPARATOR); + } if (nonLongValue) { - pw.println(); + if (ps.separateValues) { + pw.println(); + } pw.println("# " + prop.getName() + "=" + value); } } @@ -1442,6 +1495,11 @@ private static void writeEnumeratedValues( pw.println(); // if (s.size() != 0) bf.setMergeRanges(ps.mergeRanges); + bf.setShowTotal(!ps.roozbehFile); + if (ps.roozbehFile) { + bf.setRangeBreakSource( + ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block")); + } bf.showSetNames(pw, s); if (DEBUG) { System.out.println(bf.showSetNames(s)); diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java index c48269675..0f12b0ffc 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java @@ -414,19 +414,19 @@ public final class UCD_Names implements UCD_Types { // Unicode 15 "Kawi", "Nag_Mundari", - // A future version of Unicode - "Sunuwar", - "Tulu_Tigalari", - "Kirat_Rai", - "Todhri", + // Unicode 16 "Garay", "Gurung_Khema", + "Kirat_Rai", "Ol_Onal", + "Sunuwar", + "Todhri", + "Tulu_Tigalari", // Provisionally assigned - "Sidetic", "Chisoi", - "Tolong_Siki", + "Sidetic", "Tai_Yo", + "Tolong_Siki", }; public static final Relation EXTRA_SCRIPT = @@ -611,19 +611,19 @@ public final class UCD_Names implements UCD_Types { // Unicode 15 "Kawi", "Nagm", - // A future version of Unicode - "Qaba", - "Qabb", - "Qabc", - "Qabd", - "Qabe", - "Qabf", - "Qabg", + // Unicode 16 + "Gara", + "Gukh", + "Krai", + "Onao", + "Sunu", + "Todr", + "Tutg", // Provisionally assigned - "Qabh", - "Qabi", - "Qabj", - "Qabk", + "Chis", + "Sidt", + "Tayo", + "Tols", }; static final String[] SHORT_AGE = { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java index 6f5a76340..972753c37 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java @@ -599,20 +599,20 @@ public interface UCD_Types { // Unicode 15 Kawi = 164, Nag_Mundari = 165, - // A future version of Unicode - Sunuwar = 166, - Tulu_Tigalari = 167, + // Unicode 16 + Garay = 166, + Gurung_Khema = 167, Kirat_Rai = 168, - Todhri = 169, - Garay = 170, - Gurung_Khema = 171, - Ol_Onal = 172, + Ol_Onal = 169, + Sunuwar = 170, + Todhri = 171, + Tulu_Tigalari = 172, // Provisionally assigned - Sidetic = 173, - Chisoi = 174, - Tolong_Siki = 175, - Tai_Yo = 176, - LIMIT_SCRIPT = Tai_Yo + 1; + Chisoi = 173, + Sidetic = 174, + Tai_Yo = 175, + Tolong_Siki = 176, + LIMIT_SCRIPT = Tolong_Siki + 1; // Bidi_Paired_Bracket_Type public static final byte BPT_N = 0, BPT_O = 1, BPT_C = 2, LIMIT_BPT = 3; diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 702c46ca5..db8ebd7b8 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -908,6 +908,376 @@ Format: kenFile skipValue=Rotated # Property: VerticalOrientation +File: IndicPositionalCategory +# +# This file defines the following property: +# +# Indic_Positional_Category enumerated property +# +# Scope: This property is aimed at the problem of +# the specification of syllabic structure for Indic scripts. +# Because dependent vowels (matras), visible viramas, and other +# characters are placed in notional slots around the consonant (or +# consonant cluster) core of an Indic syllable, there may be +# cooccurrence constraints or other interactions. Also, it may be +# desirable, in cases where more than one such character may occur in +# sequence, as for example, in a top slot and a bottom slot, to +# specify preferred orders for spelling. As such, this property +# is designed primarily to supplement the Indic_Syllabic_Category +# property. +# +# In addition to combining marks associated with Indic scripts, the +# Indic_Positional_Category has non-trivial values for special signs +# associated with Indic_Syllabic_Category=Consonant_Prefixed +# or Indic_Syllabic_Category=Consonant_Preceding_Repha. Those signs +# have General_Category=Lo, rather than being combining marks. +# They occur in initial position in syllabic structure. However, when +# rendered, they appear as marks positioned with respect to another +# base letter (usually above it). Hence, having an explicit value for +# Indic_Positional_Category for those signs can be helpful. +# +# Note that this property is *not* intended as +# a prescriptive property regarding display or font design, +# for a number of reasons. Good font design requires information +# that is outside the context of a character encoding standard, +# and is best handled in other venues. For Indic dependent +# vowels and similar characters, in particular: +# +# 1. Matra placement may vary somewhat based on typeface design. +# 2. Matra placement, even within a single script, may vary +# somewhat according to historic period or local conventions. +# 3. Matra placement may be changed by explicit orthographic reform +# decisions. +# 4. Matras may ligate in various ways with a consonant (or even +# other elements of a syllable) instead of occurring in a +# discrete location. +# 5. Matra display may be contextually determined. This is +# notable, for example, in the Tamil script, where the shape +# and placement of -u and -uu vowels depends strongly on +# which consonant they adjoin. +# +# Format: +# Field 0 Unicode code point value or range of code point values +# Field 1 Indic_Positional_Category property value +# +# Field 1 is followed by a comment field, starting with the number sign '#', +# which shows the General_Category property value, the Unicode character name +# or names, and, in lines with ranges of code points, the code point count in +# square brackets. +# +# The scripts assessed as containing dependent vowels or similar characters +# in the structural sense used for the Indic_Positional_Category are the +# following: +# +# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, +# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, +# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, +# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu, +# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi, +# Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra, +# Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri, +# Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai, +# Tibetan, Tirhuta, and Zanabazar Square. +# +# All characters for all other scripts not in that list +# take the default value for this property. +# +# See IndicSyllabicCategory.txt for a slightly more extended +# list of Indic scripts, including those which do not have +# positional characters. Currently, those additional +# Indic scripts without positional characters are +# Multani, Phags-pa, and Tai Le. +# +# Notes: +# +# 1. The following characters are all assigned the positional category Right, +# but may have different positions in some cases: +# * U+0BC1 TAMIL VOWEL SIGN U and U+0BC2 TAMIL VOWEL SIGN UU have +# contextually variable placement in Tamil. +# * U+0D41 MALAYALAM VOWEL SIGN U and U+0D42 MALAYALAM VOWEL SIGN UU form +# complex ligatures with consonants in older Malayalam orthography. +# * U+11341 GRANTHA VOWEL SIGN U and U+11342 GRANTHA VOWEL SIGN UU have +# contextually variable placement in Grantha. +# * U+11440 NEWA VOWEL SIGN O and U+11441 NEWA VOWEL SIGN AU have contextually +# variable placement in Newa. +# +# 2. The following characters are all assigned the positional category Top, +# but may have different positions in some cases: +# * U+1143E NEWA VOWEL SIGN E and U+1143F NEWA VOWEL SIGN AI have contextually +# variable placement in Newa. +# +# 3. The following characters are all assigned the positional category Bottom, +# but may have different positions in some cases: +# * U+102F MYANMAR VOWEL SIGN U and U+1030 MYANMAR VOWEL SIGN UU have +# contextually variable placement in Myanmar. +# * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have +# contextually variable placement in Tai Tham. +# +# 4. The following character is assigned the positional category Left, but +# may have different positions in different styles: +# * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement +# in Nandinagari. +Property: Indic_Positional_Category +Format: roozbehFile separateValues=false valueStyle=short skipValue=NA +Value: Right +Value: Left +Value: Visual_Order_Left + +# These are dependent vowels that occur to the left of the consonant +# letter in a syllable, but which occur in scripts using the visual order +# model, instead of the logical order model. Because of the different +# model, these left-side vowels occur first in the backing store (before +# the consonant letter) and are not reordered during text rendering. +# +# [Derivation: Logical_Order_Exception=Yes] +Value: Left_And_Right +Value: Top +Value: Bottom +Value: Top_And_Bottom +Value: Top_And_Right +Value: Top_And_Left +Value: Top_And_Left_And_Right +Value: Bottom_And_Right +Value: Bottom_And_Left +Value: Top_And_Bottom_And_Right +Value: Top_And_Bottom_And_Left +Value: Overstruck + +File: IndicSyllabicCategory +# +# This file defines the following property: +# +# Indic_Syllabic_Category enumerated property +# +# Scope: This property is aimed at two general problem +# areas involving the analysis and processing of Indic scripts: +# +# 1. Specification of syllabic structure. +# 2. Specification of segmentation rules. +# +# Both of these problem areas may benefit from having defined subtypes +# of Indic script characters which are relevant to how Indic +# syllables (or aksaras) are constructed. Note that rules for +# syllabic structure in Indic scripts may differ significantly +# from how phonological syllables are defined. +# +# Format: +# Field 0 Unicode code point value or range of code point values +# Field 1 Indic_Syllabic_Category property value +# +# Field 1 is followed by a comment field, starting with the number sign '#', +# which shows the General_Category property value, the Unicode character name +# or names, and, in lines with ranges of code points, the code point count in +# square brackets. +# +# The scripts assessed as Indic in the structural sense used for the +# Indic_Syllabic_Category are the following: +# +# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid, +# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati, +# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi, +# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu, +# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, +# Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, +# Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo, +# Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham, +# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and +# Zanabazar Square. +# +# All characters for all other scripts not in that list +# take the default value for this property, unless they +# are individually listed in this data file. +# +Property: Indic_Syllabic_Category +Format: roozbehFile valueStyle=short skipValue=Other +Value: Bindu +# Bindu/Anusvara (nasalization or -n) + +# [Not derivable] +Value: Visarga +# Visarga (-h) +# Excludes letters for jihvamuliya and upadhmaniya, which are +# related, but structured somewhat differently. + +# [Not derivable] +Value: Avagraha +# Avagraha (elision of initial a- in sandhi) + +# [Not derivable] +Value: Nukta +# Nukta (diacritic for borrowed consonants or other consonant +# modifications). Note that while the resulting sound is typically a +# consonant, the base letter a nukta follows may be an independent +# vowel. For example, is used to transcribe ARABIC LETTER +# AIN. + +# [Not derivable] +Value: Virama +# Virama (killing of inherent vowel in consonant sequence +# or consonant stacker) +# Only includes characters that can act both as visible killer viramas +# and consonant stackers. Separate property values exist for characters +# that can only act as pure killers or only as consonant stackers. + +# [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker) +# - (InSC=Number_Joiner) - 2D7F] +Value: Pure_Killer +# Pure killer (killing of inherent vowel in consonant sequence, +# with no consonant stacking behavior) + +# [Not derivable] +Value: Invisible_Stacker +# Invisible stacker (invisible consonant stacker virama). +# +# Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible +# stacker may have a second function, changing the shape and/or location of the +# consonant preceding it, even when there is no consonant following the +# invisible stacker. + +# [Not derivable] +Value: Vowel_Independent +# Independent Vowels (contrasted with matras) + +# [Not derivable] +Value: Vowel_Dependent +# Dependent Vowels (contrasted with independent vowels and/or with +# complex placement). Known as matras in Indic scripts. Also +# includes vowel modifiers that follow dependent (and sometimes +# independent) vowels. + +# [Not derivable] +Value: Vowel +# (Other) Vowels (reanalyzed as ordinary alphabetic letters or marks) + +# [Not derivable] +Value: Consonant_Placeholder +# Consonant Placeholder +# This includes generic placeholders used for +# Indic script layout (NBSP and dotted circle), as well as a few script- +# specific vowel-holder characters which are not technically +# consonants, but serve instead as bases for placement of vowel marks. + +# [Not derivable] +Value: Consonant +# Consonant (ordinary abugida consonants, with inherent vowels) + +# [Not derivable] +Value: Consonant_Dead +# Dead Consonant (special consonant with killed vowel) + +# [Not derivable] +Value: Consonant_With_Stacker +# Consonants that may make stacked ligatures with the next consonant +# without the use of a virama + +# [Not derivable] +Value: Consonant_Prefixed +# Cluster-initial consonants + +# [Not derivable] +Value: Consonant_Preceding_Repha +# Repha Form of RA (reanalyzed in some scripts), when preceding the main +# consonant. + +# [Not derivable] +Value: Consonant_Initial_Postfixed +# Consonants that succeed the main consonant in character sequences, but are +# pronounced before it. + +# [Not derivable] +Value: Consonant_Succeeding_Repha +# Repha Form of RA (reanalyzed in some scripts), when succeeding the main +# consonant. + +# [Not derivable] +Value: Consonant_Subjoined +# Subjoined Consonant (C2 form subtending a base consonant in Tibetan, etc.) + +# [Not derivable] +Value: Consonant_Medial +# Medial Consonant (medial liquid, occurring in clusters) + +# [Not derivable] +Value: Consonant_Final +# Final Consonant (special final forms which do not take vowels) + +# [Not derivable] +Value: Consonant_Head_Letter +# Head Letter (Tibetan) + +# [Not derivable] +Value: Modifying_Letter +# Reanalyzed letters not participating in the abugida structure, but +# serving to modify the sound of an adjacent vowel or consonant. +# Note that this is not the same as General_Category=Modifier_Letter. + +# [Not derivable] +Value: Tone_Letter +# Tone Letter (spacing lexical tone mark with status as a letter) + +# [Not derivable] +Value: Tone_Mark +# Tone Mark (nonspacing or spacing lexical tone mark) + +# [Not derivable] +Value: Gemination_Mark +# Gemination Mark (doubling of the preceding or following consonant) +# +# U+0A71 GURMUKHI ADDAK precedes the consonant it geminates, while the +# others follow the consonant they geminate. + +# [Not derivable] +Value: Cantillation_Mark +# Cantillation Mark (recitation marks, such as svara markers for the Samaveda) + +# [Not derivable] +Value: Register_Shifter +# Register Shifter (shifts register for consonants, akin to a tone mark) + +# [Not derivable] +Value: Syllable_Modifier +# Syllable Modifier (miscellaneous combining characters that modify +# something in the orthographic syllable they succeed or appear in) + +# [Not derivable] +Value: Consonant_Killer +# Consonant Killer (signifies that the previous consonant or consonants are +# not pronounced) + +# [Not derivable] +Value: Non_Joiner +# Non_Joiner (Zero Width Non-Joiner) + +# [Not derivable] +Value: Joiner +# Joiner (Zero Width Joiner) + +# [Not derivable] +Value: Number_Joiner +# Number_Joiner (forms ligatures between numbers for multiplication) + +# [Not derivable] +Value: Number +# Number (can be used as vowel-holders like consonant placeholders) +# Note: A number may even hold subjoined consonants which may in turn +# have been formed using a virama or a stacker, e.g. the sequence +# where THAI THAM LETTER LOW TA is subjoined to +# TAI THAM THAM DIGIT THREE using an invisible stacker. + +# [Not derivable] +Value: Brahmi_Joining_Number +# Brahmi Joining Number (may be joined by a Number_Joiner of the same +# script, e.g. in Brahmi) +# +# Note: These are different from Numbers, in the way that there is no known +# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants. +# Until such evidence is found, implementations may assume that Brahmi +# Joining Numbers only participate in shaping with other Brahmi Joining +# Numbers. + +# [Not derivable] + File: UnicodeData Property: SPECIAL diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index 32a6ce8cc..d41ec1c0d 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -106,6 +106,7 @@ Ethiopic_Ext_A ; Ethiopic_Extended_A Ethiopic_Ext_B ; Ethiopic_Extended_B Ethiopic_Sup ; Ethiopic_Supplement Punctuation ; General_Punctuation +Garay ; Garay Geometric_Shapes ; Geometric_Shapes Geometric_Shapes_Ext ; Geometric_Shapes_Extended Georgian ; Georgian @@ -214,6 +215,7 @@ Music ; Musical_Symbols Myanmar ; Myanmar Myanmar_Ext_A ; Myanmar_Extended_A Myanmar_Ext_B ; Myanmar_Extended_B +Myanmar_Ext_C ; Myanmar_Extended_C Nabataean ; Nabataean Nag_Mundari ; Nag_Mundari Nandinagari ; Nandinagari @@ -272,6 +274,7 @@ Soyombo ; Soyombo Specials ; Specials Sundanese ; Sundanese Sundanese_Sup ; Sundanese_Supplement +Sunuwar ; Sunuwar Super_And_Sub ; Superscripts_And_Subscripts Sup_Arrows_A ; Supplemental_Arrows_A Sup_Arrows_B ; Supplemental_Arrows_B @@ -285,6 +288,7 @@ Sutton_SignWriting ; Sutton_SignWriting Syloti_Nagri ; Syloti_Nagri Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +Symbols_For_Legacy_Computing_Sup ; Symbols_For_Legacy_Computing_Supplement Syriac ; Syriac Syriac_Sup ; Syriac_Supplement Tagalog ; Tagalog @@ -307,6 +311,7 @@ Thai ; Thai Tibetan ; Tibetan Tifinagh ; Tifinagh Tirhuta ; Tirhuta +Todhri ; Todhri Toto ; Toto Transport_And_Map ; Transport_And_Map_Symbols Ugaritic ; Ugaritic diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 8cb202e68..8ee8762b3 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -530,7 +530,10 @@ Show [\u20b9] # exceptions. Should such exceptions arise, they can be added to the definition of # $nonAlphabeticBindus to avoid a failure on this test. Let $nonAlphabeticBindus = [] -[\p{InSc=Bindu} - $nonAlphabeticBindus - \p{Alphabetic}] = [] +[\p{InSc=Bindu} - \p{Alphabetic}] = $nonAlphabeticBindus + +Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MAITAIKHU}\N{LIMBU SIGN KEMPHRENG}\N{SHARADA VOWEL MODIFIER MARK}\N{SHARADA EXTRA SHORT VOWEL MARK}] +[\p{InSC=Vowel_Dependent} - \p{Alphabetic}] = $nonAlphabeticDependentVowels ########################## # LineBreak property