diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml
index 3fd9e8e56b..24d4dce1cc 100644
--- a/.github/workflows/cli-build-instructions.yml
+++ b/.github/workflows/cli-build-instructions.yml
@@ -81,8 +81,14 @@ jobs:
run: |
mkdir -p Generated/BIN
- - name: Run command - Build and Test
- run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml package -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
+ # Since these are just examples to smoke-test the in-source build process,
+ # let’s not run the whole build and test suite, which is quite slow (6 min
+ # 26 s as of this writing). Just run the invariant tests and smoke-test
+ # MakeUnicodeFiles. We don’t even check that MakeUnicodeFiles doesn’t
+ # change anything, which makes little sense; but that is the job of the
+ # other job.
+ - name: Run invariant tests
+ run: MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants -DfailIfNoTests=false -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd Generated; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -91,14 +97,15 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- out-of-source-build:
- name: Out-of-source Instructions
+
+ # Out-of-source build.
+ ucd-and-smoke-tests:
+ name: Check UCD consistency, invariants, smoke-test generators
runs-on: ubuntu-latest
steps:
- name: Checkout Unicode Tools
uses: actions/checkout@v3
with:
- repository: unicode-org/unicodetools
path: unicodetools/mine/src
- name: Get the CLDR_REF from pom.xml
id: cldr_ref
@@ -136,6 +143,30 @@ jobs:
run: |
mkdir -p unicodetools/mine/Generated/BIN
+ - name: Run command - Make Unicode Files
+ run: |
+ cd unicodetools/mine/src
+ mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Check that UCD files are consistent
+ run: |
+ cd unicodetools/mine/src
+ ./py/copygenerateducd.py --out-of-source -y
+ git diff --compact-summary --exit-code || {
+ git diff --compact-summary |
+ awk '{
+ if (previous) {
+ print "::error file="previous",title=File must be regenerated::Run org.unicode.text.UCD.Main build MakeUnicodeFiles and copy any changed files to unicodetools/data/ucd/dev."
+ }
+ previous=$1
+ }'
+ exit 1
+ }
+
+ # Only test once we know the UCD is internally consistent.
+ # MakeUnicodeFiles is much faster than this anyway.
- name: Run command - Build and Test
run: |
cd unicodetools/mine/src
@@ -151,13 +182,6 @@ jobs:
path: |
unicodetools/mine/Generated/UnicodeTestResults.*
- - name: Run command - Make Unicode Files
- run: |
- cd unicodetools/mine/src
- mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCD.Main" -Dexec.args="version $CURRENT_UVERSION build MakeUnicodeFiles" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
# https://github.com/unicode-org/unicodetools/blob/main/docs/emoji/aac.md#aacorderjava
- name: Run command - AAC Order
run: |
@@ -166,18 +190,6 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- # https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
- # Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
- - name: Run command - UCA - collation validity log
- run: |
- cd unicodetools/mine/src
- # invoke main() in class ...UCA.Main
- mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
- # check for output file
- compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
- env:
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
# https://github.com/unicode-org/unicodetools/blob/main/docs/idna.md
- name: Run command - IDNA
run: |
@@ -252,3 +264,61 @@ jobs:
mvn -s .github/workflows/mvn-settings.xml -Dexec.mainClass="org.unicode.propstest.CheckProperties" -Dexec.classpathScope=test test-compile -Dexec.args="COMPARE ALL $PREVIOUS_UVERSION" compile exec:java -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+ # Out-of-source build.
+ uca:
+ name: Check UCA data
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Unicode Tools
+ uses: actions/checkout@v3
+ with:
+ repository: unicode-org/unicodetools
+ path: unicodetools/mine/src
+ - name: Get the CLDR_REF from pom.xml
+ id: cldr_ref
+ run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT}
+ - name: Verify CLDR checkout ref
+ run: echo CLDR_REF="${{ steps.cldr_ref.outputs.CLDR_REF }}" && [ "${{ steps.cldr_ref.outputs.CLDR_REF }}x" != "x" ] # fail if empty
+ - name: Cache CLDR repository
+ uses: actions/cache@v3
+ with:
+ path: cldr/mine/src
+ key: cldr-${{ steps.cldr_ref.outputs.CLDR_REF }}
+ restore-keys: |
+ cldr
+ - name: Check out CLDR
+ uses: actions/checkout@v3
+ with:
+ repository: unicode-org/cldr
+ path: cldr/mine/src
+ ref: main
+ fetch-depth: 0
+ - name: Switch CLDR to CLDR_REF
+ run: cd cldr/mine/src && git fetch && git checkout ${{ steps.cldr_ref.outputs.CLDR_REF }}
+ - name: Set up JDK 11
+ uses: actions/setup-java@v1
+ with:
+ java-version: 11
+ - name: Cache local Maven repository
+ uses: actions/cache@v2
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+ - name: Set up out-of-source output dir
+ run: |
+ mkdir -p unicodetools/mine/Generated/BIN
+
+ # https://github.com/unicode-org/unicodetools/blob/main/docs/uca/index.md#tools--tests
+ # Note: Not running desuffixucd.py in UCA jobs because no version numbers detected in data file names
+ - name: Run command - UCA - collation validity log
+ run: |
+ cd unicodetools/mine/src
+ # invoke main() in class ...UCA.Main
+ mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION
+ # check for output file
+ compgen -G "../Generated/UCA/*/CheckCollationValidity.html"
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/UnicodeJsps/Dockerfile b/UnicodeJsps/Dockerfile
index 1c1fdc76fd..6d68d359c6 100644
--- a/UnicodeJsps/Dockerfile
+++ b/UnicodeJsps/Dockerfile
@@ -3,7 +3,7 @@ FROM alpine as cbuild
WORKDIR /build
RUN apk add --update wget make gcc musl-dev
ARG CPATH=https://www.unicode.org/Public/PROGRAMS/BidiReferenceC/
-ARG CVERSION=15.0.0
+ARG CVERSION=15.1.0
RUN wget -np -nv --reject-regex='.*\.(lib|exe)$' --cut-dirs=4 -nH -r ${CPATH}${CVERSION}/
RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c brutils.c brtest.c brtable.c brrule.c
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
diff --git a/UnicodeJsps/jetty.d/ROOT/robots.txt b/UnicodeJsps/jetty.d/ROOT/robots.txt
new file mode 100644
index 0000000000..a40ff93be4
--- /dev/null
+++ b/UnicodeJsps/jetty.d/ROOT/robots.txt
@@ -0,0 +1,2 @@
+User-agent: *
+Disallow: /UnicodeJsps
diff --git a/UnicodeJsps/pom.xml b/UnicodeJsps/pom.xml
index 98f0e75b27..83d01106fb 100644
--- a/UnicodeJsps/pom.xml
+++ b/UnicodeJsps/pom.xml
@@ -65,7 +65,7 @@
com.google.guava
guava
- 29.0-jre
+ 32.0.0-jre
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt
index a1e3895703..14541ac144 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierStatus.txt
@@ -1,5 +1,5 @@
# IdentifierStatus.txt
-# Date: 2023-05-16, 22:25:15 GMT
+# Date: 2023-08-11, 17:46:41 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -582,8 +582,8 @@ FA27..FA29 ; Allowed # 1.1 [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK CO
2B740..2B81D ; Allowed # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Allowed # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Allowed # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
-2EBF0..2EE4A ; Allowed # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
+2EBF0..2EE5D ; Allowed # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Allowed # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Allowed # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 112759
+# Total code points: 112778
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt
index 59602a6648..695156e6ad 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdentifierType.txt
@@ -1,5 +1,5 @@
# IdentifierType.txt
-# Date: 2023-05-16, 22:25:14 GMT
+# Date: 2023-08-11, 17:46:40 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -576,11 +576,11 @@ FA27..FA29 ; Recommended # 1.1 [3] CJK COMPATIBILITY ID
2B740..2B81D ; Recommended # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Recommended # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Recommended # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
-2EBF0..2EE4A ; Recommended # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
+2EBF0..2EE5D ; Recommended # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Recommended # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Recommended # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 112742
+# Total code points: 112761
# Identifier_Type: Inclusion
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt
index 0ef35b90ea..3fb44638b6 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/IdnaMappingTable.txt
@@ -1,5 +1,5 @@
# IdnaMappingTable.txt
-# Date: 2023-05-15, 22:37:02 GMT
+# Date: 2023-08-10, 22:32:27 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -2036,7 +2036,7 @@
1E9A ; mapped ; 0061 02BE # 1.1 LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B ; mapped ; 1E61 # 2.0 LATIN SMALL LETTER LONG S WITH DOT ABOVE
1E9C..1E9D ; valid # 5.1 LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE
-1E9E ; mapped ; 0073 0073 # 5.1 LATIN CAPITAL LETTER SHARP S
+1E9E ; mapped ; 00DF # 5.1 LATIN CAPITAL LETTER SHARP S
1E9F ; valid # 5.1 LATIN SMALL LETTER DELTA
1EA0 ; mapped ; 1EA1 # 1.1 LATIN CAPITAL LETTER A WITH DOT BELOW
1EA1 ; valid # 1.1 LATIN SMALL LETTER A WITH DOT BELOW
@@ -3422,7 +3422,7 @@
31C0..31CF ; valid ; ; NV8 # 4.1 CJK STROKE T..CJK STROKE N
31D0..31E3 ; valid ; ; NV8 # 5.1 CJK STROKE H..CJK STROKE Q
31E4..31EE ; disallowed # NA ..
-31EF ; valid ; ; NV8 # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
+31EF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; valid # 3.2 KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200 ; disallowed_STD3_mapped ; 0028 1100 0029 #1.1 PARENTHESIZED HANGUL KIYEOK
3201 ; disallowed_STD3_mapped ; 0028 1102 0029 #1.1 PARENTHESIZED HANGUL NIEUN
@@ -8448,8 +8448,8 @@ FFFE..FFFF ; disallowed # 1.1 ..
2CEB0..2EBE0 ; valid # 10.0 CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBE1..2EBEF ; disallowed # NA ..
-2EBF0..2EE4A ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
-2EE4B..2F7FF ; disallowed # NA ..
+2EBF0..2EE5D ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
+2EE5E..2F7FF ; disallowed # NA ..
2F800 ; mapped ; 4E3D # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F800
2F801 ; mapped ; 4E38 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F801
2F802 ; mapped ; 4E41 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F802
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt
index eb3ed3c8af..4dd0fb39d9 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt
@@ -1,10 +1,7 @@
; charset=UTF-8
@@@ The Unicode Standard 15.1.0
-@@@+ U15M230512.lst
- Unicode 15.1.0 names list, seventh delta.
- Repertoire synched with UnicodeData-15.1.0d2.txt.
- Tweak use of notices to suppress year expansions.
- Update annotations for 06F4..06F7.
+@@@+ U15M230728.lst
+ Unicode 15.1.0 final names list.
This file is semi-automatically derived from UnicodeData.txt and
a set of manually created annotations using a script to select
or suppress information from the data file. The rules used
@@ -2429,7 +2426,8 @@
= apostrophe
* glottal stop, glottalization, ejective
* many languages use this as a letter of their alphabets
- * used as a tone marker in Bodo, Dogri, and Maithili
+ * used as a tone marker in Bodo and Dogri
+ * indicates vowel elongation, or various truncations and ellipsis in Maithili
* used as a modifier letter in the Lisu script
* 2019 is the preferred character for a punctuation apostrophe
x (apostrophe - 0027)
@@ -4456,7 +4454,8 @@
* Uyghur, Kazakh
06CC ARABIC LETTER FARSI YEH
* Arabic, Persian, Urdu, Kashmiri, ...
- * initial and medial forms of this letter have dots
+ * initial and medial forms of this letter have two horizontal dots below
+ * retains its dots in initial and medial forms when used in combination with 0654
x (arabic letter alef maksura - 0649)
x (arabic letter yeh - 064A)
06CD ARABIC LETTER YEH WITH TAIL
@@ -19522,8 +19521,10 @@
* indicates pronunciation on one side of the mouth only
x (section sign - 00A7)
@ Dashes
+@+ These long dashes are shown in the code charts inside dashed square boxes because of their width. In production fonts they would simply display as extra-wide dashes.
2E3A TWO-EM DASH
= omission dash
+ * may be used in Chinese for abrupt change of thought, inserting new content, or continuation of tone or sound
x (em dash - 2014)
2E3B THREE-EM DASH
@ Alternate forms of punctuation
@@ -26618,7 +26619,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH
@@+
@+ This block, despite its name, contains a number of unified CJK ideographs. Each is also individually identified by an annotation.
@+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source.
-@ Pronunciation variants from KS X 1001:1998
+@ Pronunciation variants from KS X 1001:1998
F900 CJK COMPATIBILITY IDEOGRAPH-F900
: 8C48
F901 CJK COMPATIBILITY IDEOGRAPH-F901
@@ -44024,6 +44025,7 @@ FFFF
1BC01 DUPLOYAN LETTER X
* Salishan
@ Line consonants
+@+ Small arrows shown in the chart glyphs for some strokes and arcs indicate the handwriting direction of these characters, and are not visibly rendered.
1BC02 DUPLOYAN LETTER P
* Chinook number 1
1BC03 DUPLOYAN LETTER T
@@ -54200,9 +54202,9 @@ FFFF
@@ 2B740 CJK Unified Ideographs Extension D 2B81D
@@ 2B820 CJK Unified Ideographs Extension E 2CEA1
@@ 2CEB0 CJK Unified Ideographs Extension F 2EBE0
-@@ 2EBF0 CJK Unified Ideographs Extension I 2EE4A
+@@ 2EBF0 CJK Unified Ideographs Extension I 2EE5D
@@ 2F800 CJK Compatibility Ideographs Supplement 2FA1F
-@ Duplicate characters from CNS 11643-1992
+@ Duplicate characters from CNS 11643-1992
2F800 CJK COMPATIBILITY IDEOGRAPH-2F800
: 4E3D
2F801 CJK COMPATIBILITY IDEOGRAPH-2F801
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt
index 9b39c97b95..cf73eca009 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/confusables.txt
@@ -1,5 +1,5 @@
# confusables.txt
-# Date: 2023-05-16, 22:25:14 GMT
+# Date: 2023-08-11, 17:46:40 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -349,8 +349,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F
A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP #
-30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
-FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
+30FB ; 00B7 ; MA # ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
+FF65 ; 00B7 ; MA # ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT #
0387 ; 00B7 ; MA # ( · → · ) GREEK ANO TELEIA → MIDDLE DOT #
2E31 ; 00B7 ; MA #* ( ⸱ → · ) WORD SEPARATOR MIDDLE DOT → MIDDLE DOT #
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt
index a017cd2a2c..686b25ab7f 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyAliases.txt
@@ -1,5 +1,5 @@
# PropertyAliases-15.1.0.txt
-# Date: 2023-03-23, 00:36:58 GMT
+# Date: 2023-08-07, 15:21:34 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -122,6 +122,7 @@ ea ; East_Asian_Width
gc ; General_Category
GCB ; Grapheme_Cluster_Break
hst ; Hangul_Syllable_Type
+InCB ; Indic_Conjunct_Break
InPC ; Indic_Positional_Category
InSC ; Indic_Syllabic_Category
jg ; Joining_Group
@@ -211,6 +212,6 @@ XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD
# ================================================
-# Total: 133
+# Total: 134
# EOF
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt
index 9d1400e40e..17ffec935d 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/data/PropertyValueAliases.txt
@@ -1,5 +1,5 @@
-# PropertyValueAliases-15.1.0.txt
-# Date: 2023-05-10, 16:59:10 GMT
+# PropertyValueAliases-16.0.0.txt
+# Date: 2023-10-17, 12:29:15 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -92,6 +92,7 @@ age; 13.0 ; V13_0
age; 14.0 ; V14_0
age; 15.0 ; V15_0
age; 15.1 ; V15_1
+age; 16.0 ; V16_0
age; NA ; Unassigned
# Alphabetic (Alpha)
@@ -367,6 +368,7 @@ blk; Music ; Musical_Symbols
blk; Myanmar ; Myanmar
blk; Myanmar_Ext_A ; Myanmar_Extended_A
blk; Myanmar_Ext_B ; Myanmar_Extended_B
+blk; Myanmar_Ext_C ; Myanmar_Extended_C
blk; Nabataean ; Nabataean
blk; Nag_Mundari ; Nag_Mundari
blk; Nandinagari ; Nandinagari
@@ -425,6 +427,7 @@ blk; Soyombo ; Soyombo
blk; Specials ; Specials
blk; Sundanese ; Sundanese
blk; Sundanese_Sup ; Sundanese_Supplement
+blk; Sunuwar ; Sunuwar
blk; Sup_Arrows_A ; Supplemental_Arrows_A
blk; Sup_Arrows_B ; Supplemental_Arrows_B
blk; Sup_Arrows_C ; Supplemental_Arrows_C
@@ -460,6 +463,7 @@ blk; Thai ; Thai
blk; Tibetan ; Tibetan
blk; Tifinagh ; Tifinagh
blk; Tirhuta ; Tirhuta
+blk; Todhri ; Todhri
blk; Toto ; Toto
blk; Transport_And_Map ; Transport_And_Map_Symbols
blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
@@ -853,6 +857,13 @@ IDS; Y ; Yes ; T
Ideo; N ; No ; F ; False
Ideo; Y ; Yes ; T ; True
+# Indic_Conjunct_Break (InCB)
+
+InCB; Consonant ; Consonant
+InCB; Extend ; Extend
+InCB; Linker ; Linker
+InCB; None ; None
+
# Indic_Positional_Category (InPC)
InPC; Bottom ; Bottom
@@ -1416,6 +1427,7 @@ sc ; Sogo ; Old_Sogdian
sc ; Sora ; Sora_Sompeng
sc ; Soyo ; Soyombo
sc ; Sund ; Sundanese
+sc ; Sunu ; Sunuwar
sc ; Sylo ; Syloti_Nagri
sc ; Syrc ; Syriac
sc ; Tagb ; Tagbanwa
@@ -1433,6 +1445,7 @@ sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Tirh ; Tirhuta
sc ; Tnsa ; Tangsa
+sc ; Todr ; Todhri
sc ; Toto ; Toto
sc ; Ugar ; Ugaritic
sc ; Vaii ; Vai
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt
index 47ff90bd22..dfeae158ed 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-sequences.txt
@@ -1,5 +1,5 @@
# emoji-sequences.txt
-# Date: 2023-05-05, 23:24:34 GMT
+# Date: 2023-06-05, 21:39:54 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -784,7 +784,7 @@
1F1F9 1F1F2 ; RGI_Emoji_Flag_Sequence ; flag: Turkmenistan # E2.0 [1] (🇹🇲)
1F1F9 1F1F3 ; RGI_Emoji_Flag_Sequence ; flag: Tunisia # E2.0 [1] (🇹🇳)
1F1F9 1F1F4 ; RGI_Emoji_Flag_Sequence ; flag: Tonga # E2.0 [1] (🇹🇴)
-1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Turkey # E2.0 [1] (🇹🇷)
+1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Türkiye # E2.0 [1] (🇹🇷)
1F1F9 1F1F9 ; RGI_Emoji_Flag_Sequence ; flag: Trinidad & Tobago # E2.0 [1] (🇹🇹)
1F1F9 1F1FB ; RGI_Emoji_Flag_Sequence ; flag: Tuvalu # E2.0 [1] (🇹🇻)
1F1F9 1F1FC ; RGI_Emoji_Flag_Sequence ; flag: Taiwan # E2.0 [1] (🇹🇼)
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt
index b77027aa50..25f8b6154b 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/emoji-zwj-sequences.txt
@@ -1,5 +1,5 @@
# emoji-zwj-sequences.txt
-# Date: 2023-05-03, 23:48:43 GMT
+# Date: 2023-06-05, 20:04:50 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -365,6 +365,12 @@
# RGI_Emoji_ZWJ_Sequence: Role
+1F3C3 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right # E15.1 [1] (🏃➡️)
+1F3C3 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: light skin tone # E15.1 [1] (🏃🏻➡️)
+1F3C3 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-light skin tone # E15.1 [1] (🏃🏼➡️)
+1F3C3 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium skin tone # E15.1 [1] (🏃🏽➡️)
+1F3C3 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾➡️)
+1F3C3 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: dark skin tone # E15.1 [1] (🏃🏿➡️)
1F468 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; man health worker # E4.0 [1] (👨⚕️)
1F468 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; man judge # E4.0 [1] (👨⚖️)
1F468 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; man pilot # E4.0 [1] (👨✈️)
@@ -641,6 +647,18 @@
1F469 1F3FF 200D 1F9BC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in motorized wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿🦼➡️)
1F469 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair: dark skin tone # E12.0 [1] (👩🏿🦽)
1F469 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; woman in manual wheelchair facing right: dark skin tone # E15.1 [1] (👩🏿🦽➡️)
+1F6B6 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right # E15.1 [1] (🚶➡️)
+1F6B6 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: light skin tone # E15.1 [1] (🚶🏻➡️)
+1F6B6 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼➡️)
+1F6B6 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium skin tone # E15.1 [1] (🚶🏽➡️)
+1F6B6 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾➡️)
+1F6B6 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: dark skin tone # E15.1 [1] (🚶🏿➡️)
+1F9CE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right # E15.1 [1] (🧎➡️)
+1F9CE 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: light skin tone # E15.1 [1] (🧎🏻➡️)
+1F9CE 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼➡️)
+1F9CE 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽➡️)
+1F9CE 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾➡️)
+1F9CE 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿➡️)
1F9D1 200D 2695 FE0F ; RGI_Emoji_ZWJ_Sequence ; health worker # E12.1 [1] (🧑⚕️)
1F9D1 200D 2696 FE0F ; RGI_Emoji_ZWJ_Sequence ; judge # E12.1 [1] (🧑⚖️)
1F9D1 200D 2708 FE0F ; RGI_Emoji_ZWJ_Sequence ; pilot # E12.1 [1] (🧑✈️)
@@ -786,7 +804,7 @@
1F9D1 1F3FF 200D 1F9BD ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair: dark skin tone # E12.1 [1] (🧑🏿🦽)
1F9D1 1F3FF 200D 1F9BD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person in manual wheelchair facing right: dark skin tone # E15.1 [1] (🧑🏿🦽➡️)
-# Total elements: 420
+# Total elements: 438
# ================================================
@@ -1491,12 +1509,6 @@
2764 FE0F 200D 1FA79 ; RGI_Emoji_ZWJ_Sequence ; mending heart # E13.1 [1] (❤️🩹)
1F344 200D 1F7EB ; RGI_Emoji_ZWJ_Sequence ; brown mushroom # E15.1 [1] (🍄🟫)
1F34B 200D 1F7E9 ; RGI_Emoji_ZWJ_Sequence ; lime # E15.1 [1] (🍋🟩)
-1F3C3 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right # E15.1 [1] (🏃➡️)
-1F3C3 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: light skin tone # E15.1 [1] (🏃🏻➡️)
-1F3C3 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-light skin tone # E15.1 [1] (🏃🏼➡️)
-1F3C3 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium skin tone # E15.1 [1] (🏃🏽➡️)
-1F3C3 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: medium-dark skin tone # E15.1 [1] (🏃🏾➡️)
-1F3C3 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person running facing right: dark skin tone # E15.1 [1] (🏃🏿➡️)
1F3F3 FE0F 200D 26A7 FE0F ; RGI_Emoji_ZWJ_Sequence ; transgender flag # E13.0 [1] (🏳️⚧️)
1F3F3 FE0F 200D 1F308 ; RGI_Emoji_ZWJ_Sequence ; rainbow flag # E4.0 [1] (🏳️🌈)
1F3F4 200D 2620 FE0F ; RGI_Emoji_ZWJ_Sequence ; pirate flag # E11.0 [1] (🏴☠️)
@@ -1511,19 +1523,7 @@
1F636 200D 1F32B FE0F ; RGI_Emoji_ZWJ_Sequence ; face in clouds # E13.1 [1] (😶🌫️)
1F642 200D 2194 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking horizontally # E15.1 [1] (🙂↔️)
1F642 200D 2195 FE0F ; RGI_Emoji_ZWJ_Sequence ; head shaking vertically # E15.1 [1] (🙂↕️)
-1F6B6 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right # E15.1 [1] (🚶➡️)
-1F6B6 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: light skin tone # E15.1 [1] (🚶🏻➡️)
-1F6B6 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-light skin tone # E15.1 [1] (🚶🏼➡️)
-1F6B6 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium skin tone # E15.1 [1] (🚶🏽➡️)
-1F6B6 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: medium-dark skin tone # E15.1 [1] (🚶🏾➡️)
-1F6B6 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person walking facing right: dark skin tone # E15.1 [1] (🚶🏿➡️)
-1F9CE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right # E15.1 [1] (🧎➡️)
-1F9CE 1F3FB 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: light skin tone # E15.1 [1] (🧎🏻➡️)
-1F9CE 1F3FC 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-light skin tone # E15.1 [1] (🧎🏼➡️)
-1F9CE 1F3FD 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium skin tone # E15.1 [1] (🧎🏽➡️)
-1F9CE 1F3FE 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: medium-dark skin tone # E15.1 [1] (🧎🏾➡️)
-1F9CE 1F3FF 200D 27A1 FE0F ; RGI_Emoji_ZWJ_Sequence ; person kneeling facing right: dark skin tone # E15.1 [1] (🧎🏿➡️)
-# Total elements: 37
+# Total elements: 19
#EOF
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin
index 6d9381a387..0fb19403d6 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ASCII_Hex_Digit.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin
index 3292dc8b6d..f7bf20fb16 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Age.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin
index c251eff2f2..8af2cc1037 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Alphabetic.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin
index d7fcaff713..4d154dd0cd 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Basic_Emoji.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin
index e88f2935c2..37621bfe16 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Class.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin
index dc9c71eb26..6de05489a6 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Control.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin
index e9434182e8..934e70c081 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirrored.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin
index 3855f58011..7ecbe91212 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Mirroring_Glyph.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin
index e7b31e1e1d..137ea206cf 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin
index e6a1df6150..d0089782af 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Bidi_Paired_Bracket_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin
index 1bb2becdc2..88547522d3 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Block.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin
index f83439e80c..5517f1a4a1 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/CJK_Radical.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin
index 59dd361515..eef1a6a23a 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Canonical_Combining_Class.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin
index 24c4af826c..75420c338a 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Folding.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin
index cedc1b19ac..b15729670d 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Case_Ignorable.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin
index c55c322615..348a739274 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Cased.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin
index ba81da82df..d4933d2bc9 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casefolded.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin
index 0ca22e3898..384480e97b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Casemapped.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin
index 8ca4a69776..56a0dac252 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Lowercased.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin
index cd5786a942..f04e5f87d7 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_NFKC_Casefolded.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin
index c0bb98e5e9..e74e1a3543 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Titlecased.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin
index c1e7d7f4bc..35aaa01464 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Changes_When_Uppercased.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin
index e190e2ebbc..dec36868ef 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Confusable_MA.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin
index ebdf44a581..29ef054dd5 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Dash.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin
index 76091a7f65..2862713909 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Decomposition_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin
index 1df2d56519..18e77589f2 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Default_Ignorable_Code_Point.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin
index e86c031f28..ae503185d9 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Deprecated.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin
index 9dbaef364b..dab88d216a 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Diacritic.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin
index 9cf9fa3c09..8fabd01c84 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/East_Asian_Width.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin
index ce4fe107d5..5e5c2ba7fb 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin
index eeefc28e0d..729ad90d56 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Component.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin
index 6d31126f90..f62383515b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin
index 465a00ac49..7d95f9ed4f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Modifier_Base.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin
index 7428526e38..2d04064259 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Emoji_Presentation.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin
index 84506b33fc..d84cdd85e2 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Equivalent_Unified_Ideograph.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin
index 7baaa0cece..caf44d10ef 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extended_Pictographic.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin
index c27fbbabf3..40ffcc2be9 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Extender.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin
index 65d881f91d..17e647008d 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/General_Category.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin
index 9bfa37e401..61c514bbec 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Grapheme_Cluster_Break.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin
index 201075e999..44df46ac0e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hangul_Syllable_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin
index 17ad6aac71..b81092202c 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Hex_Digit.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin
index 1c51dc1dc0..ce7b5f11db 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Binary_Operator.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin
index 23241a1c5f..834d072a3e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Trinary_Operator.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin
index e3e0a3db21..a36d5af43a 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/IDS_Unary_Operator.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin
index 76b8f60ff7..aa6da36792 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Continue.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin
index 631ecc65a8..cd83a7281f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Compat_Math_Start.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin
index 56d5ddf57d..f5cef05d6a 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Continue.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin
index dbdd4db1de..2f07fb0b0f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/ID_Start.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin
index 512a302813..c56ef07c6f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Status.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin
index 4d550dc350..8d0aa06e91 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Identifier_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin
index 53781931ad..0cd1cf6952 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Ideographic.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin
index d60616f7cd..8e70e6047e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_2008.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin
index 5438c6c48a..0ef025ca55 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin
index 2df353de65..aa7f3b76a5 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Idn_Status.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin
new file mode 100644
index 0000000000..2838521f68
Binary files /dev/null and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Conjunct_Break.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin
index f8668334ee..49ef70a069 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Positional_Category.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin
index 8e3087194c..4ce2cb93cb 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Indic_Syllabic_Category.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin
index a8dd5d2d10..040ae01c26 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Join_Control.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin
index c7d614b400..dc65d1bc36 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Group.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin
index fe621ba68c..429d67aea0 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Joining_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin
index 816aff4ebe..8b5bbe482e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Line_Break.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin
index 9c4ef39df3..73536b47b3 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Logical_Order_Exception.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin
index 7954f0240f..56eef5bbe0 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin
index caec127c97..f1955976bb 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Lowercase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin
index 6ccd13ae9d..69ceca4181 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Math.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin
index bf2f51d093..6225ed7dd9 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFC_Quick_Check.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin
index ed20d51325..fc203843dc 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFD_Quick_Check.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin
index 9c63909441..9174330a0d 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Casefold.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin
index 82620bbc6a..89b2c843d7 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Quick_Check.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin
index 837289e739..85741e5f36 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKC_Simple_Casefold.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin
index 1b4b1fd764..4cb7d0b091 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/NFKD_Quick_Check.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin
index fcb708d380..935cbeed08 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin
index 5b8d2bdb4f..73066193ea 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Name_Alias.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin
index 5b33a6379b..a251ddc660 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin
index ddc2e857e0..6c063aae09 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Named_Sequences_Prov.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin
index 8708b67029..4467e672c7 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Noncharacter_Code_Point.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin
index c8fc8557d2..bbb6176644 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Type.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin
index 1a399339bf..1bd6851596 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Numeric_Value.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin
index 35752482ba..a9ac908b49 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_Syntax.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin
index 643b748b1e..d0e3d2ebfe 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Pattern_White_Space.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin
index 532e7db799..a00dd8596c 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Prepended_Concatenation_Mark.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin
index 2b2cd7f93f..eeb416a94e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Quotation_Mark.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin
index d9d1c1446d..d3e745a710 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Flag_Sequence.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin
index c73a94837b..bf74bcf58e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Keycap_Sequence.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin
index 30741d343d..32f046a03b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Modifier_Sequence.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin
index bf8902d635..3705d0d36f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Tag_Sequence.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin
index 583b46e04c..e3530b6be9 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/RGI_Emoji_Zwj_Sequence.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin
index 6d41530601..8d6ae3276b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Radical.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin
index 4a8f4bbad3..d402571701 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Regional_Indicator.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin
index bf8bfb5aa9..6457f9a34f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin
index 963f82fabe..f9944df5c2 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Script_Extensions.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin
index 376d05fdfe..e89e68021d 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Break.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin
index 78f98f66ab..bd8b5eb6cc 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Sentence_Terminal.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin
index 3a8507a9d0..1bbb982c9f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Case_Folding.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin
index 1fd4e9425c..01fa821d2b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Lowercase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin
index a5e54a321a..0b89554b2b 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Titlecase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin
index 52026e7b9b..c36a7bbe01 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Simple_Uppercase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin
index 7ad3216660..a7d8a41e9e 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Soft_Dotted.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin
index 0d277933db..93c0ea22a0 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Standardized_Variant.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin
index 852dfdb43b..ccf6347f36 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Terminal_Punctuation.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin
index 1a0853a99d..0cfba11454 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Titlecase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin
index a53e2cdfa2..07a2e12bd2 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Unified_Ideograph.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin
index 0f5f18b47b..88d7316add 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin
index 9638a45260..ae518e5602 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Uppercase_Mapping.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin
index 567c69e19d..9f869dff3c 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Variation_Selector.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin
index 741ce21e27..885c1590ef 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Vertical_Orientation.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin
index 08c28bf8cc..c7d8de06df 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/White_Space.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin
index 56fc7d81b0..26d180a6b6 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/Word_Break.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin
index 95bf005246..6f490e0834 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Continue.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin
index c25fceb6d0..c947c5559d 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/XID_Start.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin
index ca69277fd1..b0befdbe85 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kAccountingNumeric.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin
index 7ee2a9bb68..5a64aba5de 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kOtherNumeric.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin
index ec00ed8f59..f001b53d41 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kPrimaryNumeric.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin
index e02e771dc5..650a9a6e9f 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kSimplifiedVariant.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin
index aa0f2d6e56..0b17ca5de0 100644
Binary files a/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin and b/UnicodeJsps/src/main/resources/org/unicode/jsp/props/kTraditionalVariant.bin differ
diff --git a/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt b/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt
index 116f673315..6d0135fad5 100644
--- a/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt
+++ b/UnicodeJsps/src/main/resources/org/unicode/jsp/subtagNames.txt
@@ -4773,7 +4773,7 @@ mhz;Mor (Mor Islands)
mi;Māori
mia;Miami
mib;Atatláhuca Mixtec
-mic;Mi'kmaq
+mic;Mi'kmaw
mid;Mandaic
mie;Ocotepec Mixtec
mif;Mofu-Gudur
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java
index 7bd449fa75..aa92fdb083 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestLanguageid.java
@@ -1,21 +1,29 @@
package org.unicode.jsptest;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
import com.ibm.icu.util.ULocale;
import org.junit.jupiter.api.Test;
import org.unicode.jsp.LanguageCode;
-import org.unicode.unittest.TestFmwkMinusMinus;
-public class TestLanguageid extends TestFmwkMinusMinus {
+public class TestLanguageid {
@Test
public void TestParse() {
- String results;
- results = LanguageCode.validate("pap-CW", new ULocale("en"));
- if (!assertTrue("", results.contains("Curaçao"))) {
- errln(results);
+ {
+ final String results = LanguageCode.validate("pap-CW", new ULocale("en"));
+ final String expected = "Curaçao";
+ assertContains(results, expected);
+ }
+
+ {
+ final String results = LanguageCode.validate("$, eng-840, fr-fr", new ULocale("en"));
+ final String expected = "target='languageid'>fr-FR";
+ assertContains(results, expected);
}
+ }
- results = LanguageCode.validate("$, eng-840, fr-fr", new ULocale("en"));
- assertContains(results, "target='languageid'>fr-FR");
+ private void assertContains(final String results, final String expected) {
+ assertTrue(results.contains(expected), () -> results + " did not contain " + expected);
}
}
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index 3398bbb8fb..e05911654d 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -34,6 +34,7 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
+import org.opentest4j.TestAbortedException;
import org.unicode.jsp.CharEncoder;
import org.unicode.jsp.Common;
import org.unicode.jsp.UnicodeJsp;
@@ -380,6 +381,9 @@ public void TestPerMill(final String name, final Charset charset) {
CharEncoder encoder;
try {
encoder = new CharEncoder(charset, false, false);
+ } catch (UnsupportedOperationException e) {
+ // skip charsets that aren't supported
+ throw new TestAbortedException("Skipping charset " + charset.name(), e);
} catch (Exception e) {
e.printStackTrace();
assumeTrue(e == null, "Caught exception " + e);
diff --git a/docs/unicodejsps/gcp-run.md b/docs/unicodejsps/gcp-run.md
index c0ee56f518..90eaa8bdc5 100644
--- a/docs/unicodejsps/gcp-run.md
+++ b/docs/unicodejsps/gcp-run.md
@@ -24,7 +24,7 @@ mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git -f UnicodeJsps/target/cld
- build it
```
-docker build -t unicode/unicode-jsps .
+docker build -t unicode/unicode-jsps UnicodeJsps/
```
- try it
@@ -45,14 +45,14 @@ docker run --rm -p 8080:8080 unicode/unicode-jsps
- login to docker
```
-gcloud auth configure-docker \
- us-central1-docker.pkg.dev
+gcloud auth configure-docker us-central1-docker.pkg.dev
```
-- build docker image
+- build docker image and run it
```
-docker build -t us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest .
+docker build -t us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest UnicodeJsps/
+docker run --rm -p 8080:8080 us-central1-docker.pkg.dev/goog-unicode-dev/unicode-jsps/unicode-jsps:latest
```
- push docker image
diff --git a/docs/unicodejsps/index.md b/docs/unicodejsps/index.md
index 477f20d08c..c3d97f27cd 100644
--- a/docs/unicodejsps/index.md
+++ b/docs/unicodejsps/index.md
@@ -1,11 +1,5 @@
# Building UnicodeJsp
-- Note: you can run the latest UnicodeJsp locally with docker using:
-
-```
-docker run --rm -p 8080:8080 unicode/unicode-jsp
-```
-
- Note 2: there are some notes on updated processes for using GCP at [gcp-run.md](./gcp-run.md) - at present, automated deployment is TODO.
## Compiling
@@ -113,7 +107,26 @@ Look at , and make sure that
there aren't any Z-Other props at the bottom (you'll need to update via Adding
New Properties if there are).
-(:construction: **TODO**: explain how to do a Docker-based build here.)
+### Running a Docker-based build
+
+compile java stuff
+
+- `mvn -B package -am -pl UnicodeJsps -DskipTests=true`
+
+”backup” copy of CLDR and UnicodeTools. (`~/src/cldr` is an optional existing CLDR dir to save a few packets)
+
+- `git clone --reference-if-able ~/src/cldr https://github.com/unicode-org/cldr.git || (cd cldr && git pull)`
+- `mkdir -p UnicodeJsps/target && tar -cpz --exclude=.git --exclude=unicodetools/target/ -f UnicodeJsps/target/cldr-unicodetools.tgz ./cldr/ ./unicodetools/`
+
+Now, finally build.
+
+- `docker build -t unicode/unicode-jsp:latest UnicodeJsps/`
+
+… And run. Control-C to cancel it, otherwise visit
+
+```
+docker run --rm -p 8080:8080 unicode/unicode-jsp:latest
+```
## Commit/PR
diff --git a/pom.xml b/pom.xml
index 8bca1d7f57..3aedfb69c4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
- 0.0.0-SNAPSHOT-66d15bfc1b
+ 0.0.0-SNAPSHOT-ba1c4f0cb1
@@ -120,7 +120,7 @@
maven-failsafe-plugin
${maven-surefire-plugin-version}
- {devucddir / p.relative_to(genucddir)}" for p in to_move])) # noqa: E501
- confirm = bool(sys.argv[-1] == "-y") # enable running this in automation
+ confirm = bool("-y" in sys.argv[1:]) # enable running this in automation
if not confirm:
confirm = input("\nProceed [y/N]?").lower() == "y"
diff --git a/unicodetools/data/ucd/dev/ArabicShaping.txt b/unicodetools/data/ucd/dev/ArabicShaping.txt
index dd8cb333e0..0def17a032 100644
--- a/unicodetools/data/ucd/dev/ArabicShaping.txt
+++ b/unicodetools/data/ucd/dev/ArabicShaping.txt
@@ -828,6 +828,11 @@ A873; PHAGS-PA CANDRABINDU; U; No_Joining_Group
10D22; HANIFI ROHINGYA SAKIN; R; No_Joining_Group
10D23; HANIFI ROHINGYA DOTLESS KINNA YA WITH DOT ABOVE; D; HANIFI ROHINGYA KINNA YA
+# Arabic Extended-D Characters
+10EC2; DAL WITH VERTICAL 2 DOTS BELOW; R; DAL
+10EC3; TAH WITH VERTICAL 2 DOTS BELOW; D; TAH
+10EC4; KAF WITH VERTICAL 2 DOTS BELOW; D; KAF
+
# Sogdian Characters
10F30; SOGDIAN ALEPH; D; No_Joining_Group
diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt
index 0e928cd718..a8539ea1b0 100644
--- a/unicodetools/data/ucd/dev/Blocks.txt
+++ b/unicodetools/data/ucd/dev/Blocks.txt
@@ -217,6 +217,7 @@ FFF0..FFFF; Specials
10500..1052F; Elbasan
10530..1056F; Caucasian Albanian
10570..105BF; Vithkuqi
+105C0..105FF; Todhri
10600..1077F; Linear A
10780..107BF; Latin Extended-F
10800..1083F; Cypriot Syllabary
@@ -264,6 +265,7 @@ FFF0..FFFF; Specials
11600..1165F; Modi
11660..1167F; Mongolian Supplement
11680..116CF; Takri
+116D0..116FF; Myanmar Extended-C
11700..1174F; Ahom
11800..1184F; Dogra
118A0..118FF; Warang Citi
@@ -274,6 +276,7 @@ FFF0..FFFF; Specials
11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
11AC0..11AFF; Pau Cin Hau
11B00..11B5F; Devanagari Extended-A
+11BC0..11BFF; Sunuwar
11C00..11C6F; Bhaiksuki
11C70..11CBF; Marchen
11D00..11D5F; Masaram Gondi
diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt
index 69c5c64b4c..ba43df3ece 100644
--- a/unicodetools/data/ucd/dev/CaseFolding.txt
+++ b/unicodetools/data/ucd/dev/CaseFolding.txt
@@ -1,5 +1,5 @@
-# CaseFolding-15.1.0.txt
-# Date: 2023-05-12, 21:53:10 GMT
+# CaseFolding-16.0.0.txt
+# Date: 2023-10-03, 19:01:21 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -603,6 +603,7 @@
1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
+1C89; C; 1C8A; # CYRILLIC CAPITAL LETTER TJE
1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt
index 59eda32a9e..386d713b9f 100644
--- a/unicodetools/data/ucd/dev/DerivedAge.txt
+++ b/unicodetools/data/ucd/dev/DerivedAge.txt
@@ -1,5 +1,5 @@
# DerivedAge-16.0.0.txt
-# Date: 2023-10-02, 13:27:10 GMT
+# Date: 2023-10-17, 12:28:26 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -2009,9 +2009,17 @@ FDFE..FDFF ; 14.0 # [2] ARABIC LIGATURE SUBHAANAHU WA TAAALAA..ARABIC LIGAT
# Newly assigned in Unicode 16.0.0 (September, 2024)
-1CEB0..1CEB3 ; 16.0 # [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
-1F8B2 ; 16.0 # RIGHTWARDS ARROW WITH LOWER HOOK
-
-# Total code points: 5
+0897 ; 16.0 # ARABIC PEPET
+0C5C ; 16.0 # TELUGU ARCHAIC SHRII
+0CDC ; 16.0 # KANNADA ARCHAIC SHRII
+1C89..1C8A ; 16.0 # [2] CYRILLIC CAPITAL LETTER TJE..CYRILLIC SMALL LETTER TJE
+105C0..105F3 ; 16.0 # [52] TODHRI LETTER A..TODHRI LETTER OO
+10EC2..10EC4 ; 16.0 # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC ; 16.0 # ARABIC COMBINING ALEF OVERLAY
+116D0..116E3 ; 16.0 # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
+11BC0..11BE1 ; 16.0 # [34] SUNUWAR LETTER DEVI..SUNUWAR SIGN PVO
+11BF0..11BF9 ; 16.0 # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
+
+# Total code points: 125
# EOF
diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
index f1f131011c..c00c3c9763 100644
--- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
+++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt
@@ -1,5 +1,5 @@
# DerivedCoreProperties-16.0.0.txt
-# Date: 2023-10-02, 13:27:33 GMT
+# Date: 2023-10-17, 12:28:59 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -343,6 +343,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
+0897 ; Alphabetic # Mn ARABIC PEPET
08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH
08D4..08DF ; Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA
@@ -474,7 +475,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU
0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; Alphabetic # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU
@@ -494,7 +495,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU
0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; Alphabetic # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
@@ -710,7 +711,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A
1C4D..1C4F ; Alphabetic # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; Alphabetic # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; Alphabetic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; Alphabetic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; Alphabetic # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Alphabetic # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -998,6 +999,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
105A3..105B1 ; Alphabetic # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; Alphabetic # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; Alphabetic # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; Alphabetic # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; Alphabetic # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1041,6 +1043,8 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
10E80..10EA9 ; Alphabetic # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; Alphabetic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC ; Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY
10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -1211,6 +1215,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA
11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; Alphabetic # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA
@@ -1402,7 +1407,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 138387
+# Total code points: 138481
# ================================================
@@ -1691,6 +1696,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG
10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A ; Lowercase # L& CYRILLIC SMALL LETTER TJE
1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
@@ -2096,7 +2102,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 2544
+# Total code points: 2545
# ================================================
@@ -2379,6 +2385,7 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN
10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN
13A0..13F5 ; Uppercase # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89 ; Uppercase # L& CYRILLIC CAPITAL LETTER TJE
1C90..1CBA ; Uppercase # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Uppercase # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW
@@ -2755,7 +2762,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH
1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 1951
+# Total code points: 1952
# ================================================
@@ -2800,7 +2807,7 @@ FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH
10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88 ; Cased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; Cased # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; Cased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Cased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
@@ -2938,7 +2945,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 4526
+# Total code points: 4528
# ================================================
@@ -3015,7 +3022,7 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
0888 ; Case_Ignorable # Sk ARABIC RAISED ROUND DOT
0890..0891 ; Case_Ignorable # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; Case_Ignorable # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Case_Ignorable # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08C9 ; Case_Ignorable # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; Case_Ignorable # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E2 ; Case_Ignorable # Cf ARABIC DISPUTED END OF AYAH
@@ -3297,7 +3304,7 @@ FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLI
10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; Case_Ignorable # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; Case_Ignorable # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA
@@ -3440,7 +3447,7 @@ E0001 ; Case_Ignorable # Cf LANGUAGE TAG
E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2707
+# Total code points: 2709
# ================================================
@@ -3724,6 +3731,7 @@ E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELEC
10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN
10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN
13A0..13F5 ; Changes_When_Lowercased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TJE
1C90..1CBA ; Changes_When_Lowercased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Changes_When_Lowercased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW
@@ -4059,7 +4067,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE
16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
-# Total code points: 1433
+# Total code points: 1434
# ================================================
@@ -4357,6 +4365,7 @@ FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTE
10FD..10FF ; Changes_When_Uppercased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
13F8..13FD ; Changes_When_Uppercased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
1C80..1C88 ; Changes_When_Uppercased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TJE
1D79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR G
1D7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE
1D8E ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK
@@ -4696,7 +4705,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER
16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 1525
+# Total code points: 1526
# ================================================
@@ -4993,6 +5002,7 @@ FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER
0561..0587 ; Changes_When_Titlecased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN
13F8..13FD ; Changes_When_Titlecased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
1C80..1C88 ; Changes_When_Titlecased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TJE
1D79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR G
1D7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE
1D8E ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK
@@ -5332,7 +5342,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER
16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y
1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 1452
+# Total code points: 1453
# ================================================
@@ -5623,7 +5633,7 @@ FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER
10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN
10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN
13F8..13FD ; Changes_When_Casefolded # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88 ; Changes_When_Casefolded # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C89 ; Changes_When_Casefolded # L& [10] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC CAPITAL LETTER TJE
1C90..1CBA ; Changes_When_Casefolded # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Changes_When_Casefolded # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW
@@ -5964,7 +5974,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE
16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y
1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
-# Total code points: 1506
+# Total code points: 1507
# ================================================
@@ -6027,7 +6037,7 @@ FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTE
10FD..10FF ; Changes_When_Casemapped # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
13A0..13F5 ; Changes_When_Casemapped # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
13F8..13FD ; Changes_When_Casemapped # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
-1C80..1C88 ; Changes_When_Casemapped # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; Changes_When_Casemapped # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; Changes_When_Casemapped # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Changes_When_Casemapped # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G
@@ -6105,7 +6115,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER
16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y
1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 2927
+# Total code points: 2929
# ================================================
@@ -6247,7 +6257,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER
0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; ID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -6256,7 +6266,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER
0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; ID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -6364,7 +6374,7 @@ FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER
1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; ID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; ID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; ID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; ID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -6603,6 +6613,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
105A3..105B1 ; ID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; ID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; ID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; ID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; ID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -6641,6 +6652,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10D00..10D23 ; ID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; ID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -6713,6 +6725,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11A5C..11A89 ; ID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; ID_Start # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; ID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; ID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; ID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; ID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; ID_Start # Lo BHAIKSUKI SIGN AVAGRAHA
@@ -6859,7 +6872,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 136967
+# Total code points: 137059
# ================================================
@@ -6966,7 +6979,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
-0898..089F ; ID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; ID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; ID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -7115,7 +7128,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; ID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -7137,7 +7150,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; ID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -7399,7 +7412,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1C50..1C59 ; ID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
1C5A..1C77 ; ID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; ID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; ID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; ID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; ID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CD0..1CD2 ; ID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -7735,6 +7748,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
105A3..105B1 ; ID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; ID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; ID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; ID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; ID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -7782,7 +7796,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; ID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; ID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; ID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -7929,6 +7944,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA
116B8 ; ID_Continue # Lo TAKRI LETTER ARCHAIC KHA
116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; ID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; ID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -7988,6 +8004,8 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; ID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BF0..11BF9 ; ID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; ID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; ID_Continue # Mc BHAIKSUKI VOWEL SIGN AA
@@ -8218,7 +8236,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN
31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 140108
+# Total code points: 140232
# ================================================
@@ -8357,7 +8375,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR
0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; XID_Start # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -8366,7 +8384,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR
0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; XID_Start # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -8474,7 +8492,7 @@ E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR
1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; XID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; XID_Start # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; XID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; XID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -8717,6 +8735,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
105A3..105B1 ; XID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; XID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; XID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; XID_Start # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -8755,6 +8774,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
10D00..10D23 ; XID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; XID_Start # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -8827,6 +8847,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
11A5C..11A89 ; XID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; XID_Start # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; XID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; XID_Start # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; XID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; XID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; XID_Start # Lo BHAIKSUKI SIGN AVAGRAHA
@@ -8973,7 +8994,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 136944
+# Total code points: 137036
# ================================================
@@ -9076,7 +9097,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA
0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT
0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
-0898..089F ; XID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; XID_Continue # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; XID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -9225,7 +9246,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; XID_Continue # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -9247,7 +9268,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; XID_Continue # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -9509,7 +9530,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU
1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE
1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; XID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; XID_Continue # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; XID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; XID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
@@ -9850,6 +9871,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
105A3..105B1 ; XID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; XID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; XID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; XID_Continue # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -9897,7 +9919,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; XID_Continue # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; XID_Continue # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; XID_Continue # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -10044,6 +10067,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA
116B8 ; XID_Continue # Lo TAKRI LETTER ARCHAIC KHA
116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; XID_Continue # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -10103,6 +10127,8 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER
11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; XID_Continue # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BF0..11BF9 ; XID_Continue # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; XID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; XID_Continue # Mc BHAIKSUKI VOWEL SIGN AA
@@ -10333,7 +10359,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA
31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 140089
+# Total code points: 140213
# ================================================
@@ -10418,7 +10444,7 @@ E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] ......
-# Total code points: 10491
+# Total code points: 10492
# ================================================
@@ -9652,6 +9661,7 @@ E01F0..E0FFF ; NFKC_CF; # Cn [3600] ......
-# Total code points: 10453
+# Total code points: 10454
# ================================================
@@ -15411,7 +15421,7 @@ E01F0..E0FFF ; NFKC_SCF; # Cn [3600] ......
-# Total code points: 10491
+# Total code points: 10492
# EOF
diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt
index eaff179a1b..99c1d2cc7e 100644
--- a/unicodetools/data/ucd/dev/EastAsianWidth.txt
+++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt
@@ -1,5 +1,9 @@
# EastAsianWidth-16.0.0.txt
+<<<<<<< HEAD
# Date: 2023-10-02, 13:27:39 GMT
+=======
+# Date: 2023-10-17, 12:29:07 GMT
+>>>>>>> la-vache/main
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -334,7 +338,7 @@
0888 ; N # Sk ARABIC RAISED ROUND DOT
0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; N # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -502,7 +506,7 @@
0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -528,7 +532,7 @@
0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -859,7 +863,7 @@
1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; N # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; N # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; N # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; N # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1870,6 +1874,7 @@ FFFD ; A # So REPLACEMENT CHARACTER
105A3..105B1 ; N # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; N # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; N # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; N # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1947,7 +1952,8 @@ FFFD ; A # So REPLACEMENT CHARACTER
10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; N # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -2123,6 +2129,7 @@ FFFD ; A # So REPLACEMENT CHARACTER
116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; N # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -2195,6 +2202,9 @@ FFFD ; A # So REPLACEMENT CHARACTER
11AB0..11ABF ; N # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
11AC0..11AF8 ; N # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; N # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA
diff --git a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
index a7c5aef607..9b5aabfa0a 100644
--- a/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
+++ b/unicodetools/data/ucd/dev/IndicPositionalCategory.txt
@@ -1,11 +1,11 @@
-# IndicPositionalCategory-15.1.0.txt
-# Date: 2023-01-05
+# IndicPositionalCategory-16.0.0.txt
+# Date: 2023-10-02, 22:58:33 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
-# For documentation, see UAX #44: Unicode Character Database,
-# at https://www.unicode.org/reports/tr44/
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
#
# This file defines the following property:
#
diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
index f2623b4714..5de0d7554a 100644
--- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
+++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
@@ -1,11 +1,11 @@
-# IndicSyllabicCategory-15.1.0.txt
-# Date: 2023-01-05
+# IndicSyllabicCategory-16.0.0.txt
+# Date: 2023-10-02, 22:58:33 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
-# For documentation, see UAX #44: Unicode Character Database,
-# at https://www.unicode.org/reports/tr44/
+# Unicode Character Database
+# For documentation, see https://www.unicode.org/reports/tr44/
#
# This file defines the following property:
#
@@ -1335,7 +1335,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI
# script, e.g. in Brahmi)
#
# Note: These are different from Numbers, in the way that there is no known
-# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
+# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
# Until such evidence is found, implementations may assume that Brahmi
# Joining Numbers only participate in shaping with other Brahmi Joining
# Numbers.
diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt
index c7ee51f2e8..401552ccaf 100644
--- a/unicodetools/data/ucd/dev/LineBreak.txt
+++ b/unicodetools/data/ucd/dev/LineBreak.txt
@@ -1,5 +1,9 @@
# LineBreak-16.0.0.txt
+<<<<<<< HEAD
# Date: 2023-10-23, 10:06:47 GMT
+=======
+# Date: 2023-10-16, 14:22:28 GMT
+>>>>>>> la-vache/main
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -280,7 +284,7 @@
0888 ; AL # Sk ARABIC RAISED ROUND DOT
0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; NU # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; CM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; AL # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; CM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -449,7 +453,7 @@
0C4A..0C4D ; CM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; CM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; CM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; NU # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -475,7 +479,7 @@
0CCA..0CCB ; CM # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; CM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; CM # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; CM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; NU # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -832,7 +836,7 @@
1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; BA # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; AL # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; AL # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; AL # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; AL # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -2721,6 +2725,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER
105A3..105B1 ; AL # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; AL # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; AL # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; AL # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; AL # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; AL # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -2800,7 +2805,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER
10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; BA # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; CM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -2994,6 +3000,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER
116B8 ; AL # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; AL # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; SA # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; SA # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; SA # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -3071,6 +3078,9 @@ FFFD ; AI # So REPLACEMENT CHARACTER
11AB0..11ABF ; AL # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
11AC0..11AF8 ; AL # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; BB # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; AL # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; CM # Mc BHAIKSUKI VOWEL SIGN AA
diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt
index 2e88574243..5a4e3fc79a 100644
--- a/unicodetools/data/ucd/dev/NormalizationTest.txt
+++ b/unicodetools/data/ucd/dev/NormalizationTest.txt
@@ -1,5 +1,5 @@
-# NormalizationTest-15.1.0.txt
-# Date: 2023-01-05, 20:34:44 GMT
+# NormalizationTest-16.0.0.txt
+# Date: 2023-10-09, 15:12:35 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -15130,6 +15130,8 @@ FFEB;FFEB;FFEB;2192;2192; # (→; →; →; →; →; ) HALFWIDTH RIGHTWARDS ARR
FFEC;FFEC;FFEC;2193;2193; # (↓; ↓; ↓; ↓; ↓; ) HALFWIDTH DOWNWARDS ARROW
FFED;FFED;FFED;25A0;25A0; # (■; ■; ■; ■; ■; ) HALFWIDTH BLACK SQUARE
FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE
+105C9;105C9;105D2 0307;105C9;105D2 0307; # (; ; ◌̇; ; ◌̇; ) TODHRI LETTER EI
+105E4;105E4;105DA 0307;105E4;105DA 0307; # (; ; ◌̇; ; ◌̇; ) TODHRI LETTER U
10781;10781;10781;02D0;02D0; # (𐞁; 𐞁; 𐞁; ː; ː; ) MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON
10782;10782;10782;02D1;02D1; # (𐞂; 𐞂; 𐞂; ˑ; ˑ; ) MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON
10783;10783;10783;00E6;00E6; # (𐞃; 𐞃; 𐞃; æ; æ; ) MODIFIER LETTER SMALL AE
@@ -17664,6 +17666,8 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE
0061 085A 059A 0316 1DFA 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062;0061 1DFA 085A 0316 059A 0062; # (a◌࡚◌֚◌̖◌᷺b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; a◌᷺◌࡚◌̖◌֚b; ) LATIN SMALL LETTER A, MANDAIC VOCALIZATION MARK, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B
0061 059A 0316 1DFA 085B 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062;0061 1DFA 0316 085B 059A 0062; # (a◌֚◌̖◌᷺◌࡛b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; a◌᷺◌̖◌࡛◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, MANDAIC GEMINATION MARK, LATIN SMALL LETTER B
0061 085B 059A 0316 1DFA 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062;0061 1DFA 085B 0316 059A 0062; # (a◌࡛◌֚◌̖◌᷺b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; a◌᷺◌࡛◌̖◌֚b; ) LATIN SMALL LETTER A, MANDAIC GEMINATION MARK, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, LATIN SMALL LETTER B
+0061 0315 0300 05AE 0897 0062;00E0 05AE 0897 0315 0062;0061 05AE 0300 0897 0315 0062;00E0 05AE 0897 0315 0062;0061 05AE 0300 0897 0315 0062; # (a◌̕◌̀◌֮◌b; à◌֮◌◌̕b; a◌֮◌̀◌◌̕b; à◌֮◌◌̕b; a◌֮◌̀◌◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC PEPET, LATIN SMALL LETTER B
+0061 0897 0315 0300 05AE 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062;0061 05AE 0897 0300 0315 0062; # (a◌◌̕◌̀◌֮b; a◌֮◌◌̀◌̕b; a◌֮◌◌̀◌̕b; a◌֮◌◌̀◌̕b; a◌֮◌◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC PEPET, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
0061 0315 0300 05AE 0898 0062;00E0 05AE 0898 0315 0062;0061 05AE 0300 0898 0315 0062;00E0 05AE 0898 0315 0062;0061 05AE 0300 0898 0315 0062; # (a◌̕◌̀◌֮◌࢘b; à◌֮◌࢘◌̕b; a◌֮◌̀◌࢘◌̕b; à◌֮◌࢘◌̕b; a◌֮◌̀◌࢘◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, ARABIC SMALL HIGH WORD AL-JUZ, LATIN SMALL LETTER B
0061 0898 0315 0300 05AE 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062;0061 05AE 0898 0300 0315 0062; # (a◌࢘◌̕◌̀◌֮b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; a◌֮◌࢘◌̀◌̕b; ) LATIN SMALL LETTER A, ARABIC SMALL HIGH WORD AL-JUZ, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B
0061 059A 0316 1DFA 0899 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062;0061 1DFA 0316 0899 059A 0062; # (a◌֚◌̖◌᷺◌࢙b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; a◌᷺◌̖◌࢙◌֚b; ) LATIN SMALL LETTER A, HEBREW ACCENT YETIV, COMBINING GRAVE ACCENT BELOW, COMBINING DOT BELOW LEFT, ARABIC SMALL LOW WORD ISHMAAM, LATIN SMALL LETTER B
diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt
index 777e8a2881..8c1f3934d0 100644
--- a/unicodetools/data/ucd/dev/PropList.txt
+++ b/unicodetools/data/ucd/dev/PropList.txt
@@ -1,5 +1,5 @@
-# PropList-15.1.0.txt
-# Date: 2023-08-01, 21:56:53 GMT
+# PropList-16.0.0.txt
+# Date: 2023-10-13, 11:33:44 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -450,6 +450,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L
081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN
+0897 ; Other_Alphabetic # Mn ARABIC PEPET
08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA
08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN
08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA
@@ -690,6 +691,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA
10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
+10EFC ; Other_Alphabetic # Mn ARABIC COMBINING ALEF OVERLAY
11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU
11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA
11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA
@@ -834,7 +836,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA
1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 1425
+# Total code points: 1427
# ================================================
diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt
index 1f0fc0e3d1..4679ed0857 100644
--- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt
+++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt
@@ -1,5 +1,9 @@
# PropertyValueAliases-16.0.0.txt
+<<<<<<< HEAD
# Date: 2023-10-02, 13:27:46 GMT
+=======
+# Date: 2023-10-17, 12:29:15 GMT
+>>>>>>> la-vache/main
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -368,6 +372,7 @@ blk; Music ; Musical_Symbols
blk; Myanmar ; Myanmar
blk; Myanmar_Ext_A ; Myanmar_Extended_A
blk; Myanmar_Ext_B ; Myanmar_Extended_B
+blk; Myanmar_Ext_C ; Myanmar_Extended_C
blk; Nabataean ; Nabataean
blk; Nag_Mundari ; Nag_Mundari
blk; Nandinagari ; Nandinagari
@@ -426,6 +431,7 @@ blk; Soyombo ; Soyombo
blk; Specials ; Specials
blk; Sundanese ; Sundanese
blk; Sundanese_Sup ; Sundanese_Supplement
+blk; Sunuwar ; Sunuwar
blk; Sup_Arrows_A ; Supplemental_Arrows_A
blk; Sup_Arrows_B ; Supplemental_Arrows_B
blk; Sup_Arrows_C ; Supplemental_Arrows_C
@@ -462,6 +468,7 @@ blk; Thai ; Thai
blk; Tibetan ; Tibetan
blk; Tifinagh ; Tifinagh
blk; Tirhuta ; Tirhuta
+blk; Todhri ; Todhri
blk; Toto ; Toto
blk; Transport_And_Map ; Transport_And_Map_Symbols
blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
@@ -1425,6 +1432,7 @@ sc ; Sogo ; Old_Sogdian
sc ; Sora ; Sora_Sompeng
sc ; Soyo ; Soyombo
sc ; Sund ; Sundanese
+sc ; Sunu ; Sunuwar
sc ; Sylo ; Syloti_Nagri
sc ; Syrc ; Syriac
sc ; Tagb ; Tagbanwa
@@ -1442,6 +1450,7 @@ sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Tirh ; Tirhuta
sc ; Tnsa ; Tangsa
+sc ; Todr ; Todhri
sc ; Toto ; Toto
sc ; Ugar ; Ugaritic
sc ; Vaii ; Vai
diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt
index bc986275f4..7025aea00e 100644
--- a/unicodetools/data/ucd/dev/Scripts.txt
+++ b/unicodetools/data/ucd/dev/Scripts.txt
@@ -1,5 +1,9 @@
# Scripts-16.0.0.txt
+<<<<<<< HEAD
# Date: 2023-10-02, 13:28:01 GMT
+=======
+# Date: 2023-10-17, 12:29:34 GMT
+>>>>>>> la-vache/main
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -770,7 +774,7 @@ AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA
0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE
0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN
048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER
-1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; Cyrillic # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL
1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN
2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
@@ -789,7 +793,7 @@ FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBININ
1E030..1E06D ; Cyrillic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
1E08F ; Cyrillic # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-# Total code points: 506
+# Total code points: 508
# ================================================
@@ -869,7 +873,7 @@ FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATU
0888 ; Arabic # Sk ARABIC RAISED ROUND DOT
0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; Arabic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Arabic # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; Arabic # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; Arabic # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -887,7 +891,8 @@ FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM.
FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
-10EFD..10EFF ; Arabic # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; Arabic # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; Arabic # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
@@ -923,7 +928,7 @@ FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LA
1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL
-# Total code points: 1368
+# Total code points: 1373
# ================================================
@@ -1152,7 +1157,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; Telugu # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -1160,7 +1165,7 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; Telugu # So TELUGU SIGN TUUMU
-# Total code points: 100
+# Total code points: 101
# ================================================
@@ -1183,14 +1188,14 @@ A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY
0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; Kannada # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0CF3 ; Kannada # Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
-# Total code points: 91
+# Total code points: 92
# ================================================
@@ -1370,8 +1375,9 @@ AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE
AA7C ; Myanmar # Mn MYANMAR SIGN TAI LAING TONE-2
AA7D ; Myanmar # Mc MYANMAR SIGN TAI LAING TONE-5
AA7E..AA7F ; Myanmar # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA
+116D0..116E3 ; Myanmar # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
-# Total code points: 223
+# Total code points: 243
# ================================================
@@ -3031,4 +3037,18 @@ ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DI
# Total code points: 42
+# ================================================
+
+11BC0..11BE0 ; Sunuwar # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; Sunuwar # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; Sunuwar # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
+
+# Total code points: 44
+
+# ================================================
+
+105C0..105F3 ; Todhri # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
+
+# Total code points: 52
+
# EOF
diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt
index 24867811e6..f04fa52fdf 100644
--- a/unicodetools/data/ucd/dev/UnicodeData.txt
+++ b/unicodetools/data/ucd/dev/UnicodeData.txt
@@ -2123,6 +2123,7 @@
088E;ARABIC VERTICAL TAIL;Lo;0;AL;;;;;N;;;;;
0890;ARABIC POUND MARK ABOVE;Cf;0;AN;;;;;N;;;;;
0891;ARABIC PIASTRE MARK ABOVE;Cf;0;AN;;;;;N;;;;;
+0897;ARABIC PEPET;Mn;230;NSM;;;;;N;;;;;
0898;ARABIC SMALL HIGH WORD AL-JUZ;Mn;230;NSM;;;;;N;;;;;
0899;ARABIC SMALL LOW WORD ISHMAAM;Mn;220;NSM;;;;;N;;;;;
089A;ARABIC SMALL LOW WORD IMAALA;Mn;220;NSM;;;;;N;;;;;
@@ -2861,6 +2862,7 @@
0C58;TELUGU LETTER TSA;Lo;0;L;;;;;N;;;;;
0C59;TELUGU LETTER DZA;Lo;0;L;;;;;N;;;;;
0C5A;TELUGU LETTER RRRA;Lo;0;L;;;;;N;;;;;
+0C5C;TELUGU ARCHAIC SHRII;Lo;0;L;;;;;N;;;;;
0C5D;TELUGU LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;;
0C60;TELUGU LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;;
0C61;TELUGU LETTER VOCALIC LL;Lo;0;L;;;;;N;;;;;
@@ -2957,6 +2959,7 @@
0CCD;KANNADA SIGN VIRAMA;Mn;9;NSM;;;;;N;;;;;
0CD5;KANNADA LENGTH MARK;Mc;0;L;;;;;N;;;;;
0CD6;KANNADA AI LENGTH MARK;Mc;0;L;;;;;N;;;;;
+0CDC;KANNADA ARCHAIC SHRII;Lo;0;L;;;;;N;;;;;
0CDD;KANNADA LETTER NAKAARA POLLU;Lo;0;L;;;;;N;;;;;
0CDE;KANNADA LETTER FA;Lo;0;L;;;;;N;;;;;
0CE0;KANNADA LETTER VOCALIC RR;Lo;0;L;;;;;N;;;;;
@@ -6511,6 +6514,8 @@
1C86;CYRILLIC SMALL LETTER TALL HARD SIGN;Ll;0;L;;;;;N;;;042A;;042A
1C87;CYRILLIC SMALL LETTER TALL YAT;Ll;0;L;;;;;N;;;0462;;0462
1C88;CYRILLIC SMALL LETTER UNBLENDED UK;Ll;0;L;;;;;N;;;A64A;;A64A
+1C89;CYRILLIC CAPITAL LETTER TJE;Lu;0;L;;;;;N;;;;1C8A;
+1C8A;CYRILLIC SMALL LETTER TJE;Ll;0;L;;;;;N;;;1C89;;1C89
1C90;GEORGIAN MTAVRULI CAPITAL LETTER AN;Lu;0;L;;;;;N;;;;10D0;
1C91;GEORGIAN MTAVRULI CAPITAL LETTER BAN;Lu;0;L;;;;;N;;;;10D1;
1C92;GEORGIAN MTAVRULI CAPITAL LETTER GAN;Lu;0;L;;;;;N;;;;10D2;
@@ -18001,6 +18006,58 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
105B9;VITHKUQI SMALL LETTER XE;Ll;0;L;;;;;N;;;10592;;10592
105BB;VITHKUQI SMALL LETTER Y;Ll;0;L;;;;;N;;;10594;;10594
105BC;VITHKUQI SMALL LETTER ZE;Ll;0;L;;;;;N;;;10595;;10595
+105C0;TODHRI LETTER A;Lo;0;L;;;;;N;;;;;
+105C1;TODHRI LETTER AS;Lo;0;L;;;;;N;;;;;
+105C2;TODHRI LETTER BA;Lo;0;L;;;;;N;;;;;
+105C3;TODHRI LETTER MBA;Lo;0;L;;;;;N;;;;;
+105C4;TODHRI LETTER CA;Lo;0;L;;;;;N;;;;;
+105C5;TODHRI LETTER CHA;Lo;0;L;;;;;N;;;;;
+105C6;TODHRI LETTER DA;Lo;0;L;;;;;N;;;;;
+105C7;TODHRI LETTER NDA;Lo;0;L;;;;;N;;;;;
+105C8;TODHRI LETTER DHA;Lo;0;L;;;;;N;;;;;
+105C9;TODHRI LETTER EI;Lo;0;L;105D2 0307;;;;N;;;;;
+105CA;TODHRI LETTER E;Lo;0;L;;;;;N;;;;;
+105CB;TODHRI LETTER FA;Lo;0;L;;;;;N;;;;;
+105CC;TODHRI LETTER GA;Lo;0;L;;;;;N;;;;;
+105CD;TODHRI LETTER NGA;Lo;0;L;;;;;N;;;;;
+105CE;TODHRI LETTER GJA;Lo;0;L;;;;;N;;;;;
+105CF;TODHRI LETTER NGJA;Lo;0;L;;;;;N;;;;;
+105D0;TODHRI LETTER HA;Lo;0;L;;;;;N;;;;;
+105D1;TODHRI LETTER HJA;Lo;0;L;;;;;N;;;;;
+105D2;TODHRI LETTER I;Lo;0;L;;;;;N;;;;;
+105D3;TODHRI LETTER JA;Lo;0;L;;;;;N;;;;;
+105D4;TODHRI LETTER KA;Lo;0;L;;;;;N;;;;;
+105D5;TODHRI LETTER LA;Lo;0;L;;;;;N;;;;;
+105D6;TODHRI LETTER LLA;Lo;0;L;;;;;N;;;;;
+105D7;TODHRI LETTER MA;Lo;0;L;;;;;N;;;;;
+105D8;TODHRI LETTER NA;Lo;0;L;;;;;N;;;;;
+105D9;TODHRI LETTER NJAN;Lo;0;L;;;;;N;;;;;
+105DA;TODHRI LETTER O;Lo;0;L;;;;;N;;;;;
+105DB;TODHRI LETTER PA;Lo;0;L;;;;;N;;;;;
+105DC;TODHRI LETTER QA;Lo;0;L;;;;;N;;;;;
+105DD;TODHRI LETTER RA;Lo;0;L;;;;;N;;;;;
+105DE;TODHRI LETTER RRA;Lo;0;L;;;;;N;;;;;
+105DF;TODHRI LETTER SA;Lo;0;L;;;;;N;;;;;
+105E0;TODHRI LETTER SHA;Lo;0;L;;;;;N;;;;;
+105E1;TODHRI LETTER SHTA;Lo;0;L;;;;;N;;;;;
+105E2;TODHRI LETTER TA;Lo;0;L;;;;;N;;;;;
+105E3;TODHRI LETTER THA;Lo;0;L;;;;;N;;;;;
+105E4;TODHRI LETTER U;Lo;0;L;105DA 0307;;;;N;;;;;
+105E5;TODHRI LETTER VA;Lo;0;L;;;;;N;;;;;
+105E6;TODHRI LETTER XA;Lo;0;L;;;;;N;;;;;
+105E7;TODHRI LETTER NXA;Lo;0;L;;;;;N;;;;;
+105E8;TODHRI LETTER XHA;Lo;0;L;;;;;N;;;;;
+105E9;TODHRI LETTER NXHA;Lo;0;L;;;;;N;;;;;
+105EA;TODHRI LETTER Y;Lo;0;L;;;;;N;;;;;
+105EB;TODHRI LETTER JY;Lo;0;L;;;;;N;;;;;
+105EC;TODHRI LETTER ZA;Lo;0;L;;;;;N;;;;;
+105ED;TODHRI LETTER ZHA;Lo;0;L;;;;;N;;;;;
+105EE;TODHRI LETTER GHA;Lo;0;L;;;;;N;;;;;
+105EF;TODHRI LETTER STA;Lo;0;L;;;;;N;;;;;
+105F0;TODHRI LETTER SKAN;Lo;0;L;;;;;N;;;;;
+105F1;TODHRI LETTER KHA;Lo;0;L;;;;;N;;;;;
+105F2;TODHRI LETTER PSA;Lo;0;L;;;;;N;;;;;
+105F3;TODHRI LETTER OO;Lo;0;L;;;;;N;;;;;
10600;LINEAR A SIGN AB001;Lo;0;L;;;;;N;;;;;
10601;LINEAR A SIGN AB002;Lo;0;L;;;;;N;;;;;
10602;LINEAR A SIGN AB003;Lo;0;L;;;;;N;;;;;
@@ -19400,6 +19457,10 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
10EAD;YEZIDI HYPHENATION MARK;Pd;0;R;;;;;N;;;;;
10EB0;YEZIDI LETTER LAM WITH DOT ABOVE;Lo;0;R;;;;;N;;;;;
10EB1;YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE;Lo;0;R;;;;;N;;;;;
+10EC2;ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;;
+10EC3;ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;;
+10EC4;ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW;Lo;0;AL;;;;;N;;;;;
+10EFC;ARABIC COMBINING ALEF OVERLAY;Mn;0;NSM;;;;;N;;;;;
10EFD;ARABIC SMALL LOW WORD SAKTA;Mn;220;NSM;;;;;N;;;;;
10EFE;ARABIC SMALL LOW WORD QASR;Mn;220;NSM;;;;;N;;;;;
10EFF;ARABIC SMALL LOW WORD MADDA;Mn;220;NSM;;;;;N;;;;;
@@ -20695,6 +20756,26 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
116C7;TAKRI DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
116C8;TAKRI DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
116C9;TAKRI DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
+116D0;MYANMAR PAO DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
+116D1;MYANMAR PAO DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
+116D2;MYANMAR PAO DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
+116D3;MYANMAR PAO DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
+116D4;MYANMAR PAO DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
+116D5;MYANMAR PAO DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
+116D6;MYANMAR PAO DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
+116D7;MYANMAR PAO DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
+116D8;MYANMAR PAO DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
+116D9;MYANMAR PAO DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
+116DA;MYANMAR EASTERN PWO KAREN DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
+116DB;MYANMAR EASTERN PWO KAREN DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
+116DC;MYANMAR EASTERN PWO KAREN DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
+116DD;MYANMAR EASTERN PWO KAREN DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
+116DE;MYANMAR EASTERN PWO KAREN DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
+116DF;MYANMAR EASTERN PWO KAREN DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
+116E0;MYANMAR EASTERN PWO KAREN DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
+116E1;MYANMAR EASTERN PWO KAREN DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
+116E2;MYANMAR EASTERN PWO KAREN DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
+116E3;MYANMAR EASTERN PWO KAREN DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
11700;AHOM LETTER KA;Lo;0;L;;;;;N;;;;;
11701;AHOM LETTER KHA;Lo;0;L;;;;;N;;;;;
11702;AHOM LETTER NGA;Lo;0;L;;;;;N;;;;;
@@ -21279,6 +21360,50 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;;
11B07;DEVANAGARI SIGN WESTERN NINE-LIKE BHALE;Po;0;L;;;;;N;;;;;
11B08;DEVANAGARI SIGN REVERSED NINE-LIKE BHALE;Po;0;L;;;;;N;;;;;
11B09;DEVANAGARI SIGN MINDU;Po;0;L;;;;;N;;;;;
+11BC0;SUNUWAR LETTER DEVI;Lo;0;L;;;;;N;;;;;
+11BC1;SUNUWAR LETTER TASLA;Lo;0;L;;;;;N;;;;;
+11BC2;SUNUWAR LETTER EKO;Lo;0;L;;;;;N;;;;;
+11BC3;SUNUWAR LETTER IMAR;Lo;0;L;;;;;N;;;;;
+11BC4;SUNUWAR LETTER REU;Lo;0;L;;;;;N;;;;;
+11BC5;SUNUWAR LETTER UTTHI;Lo;0;L;;;;;N;;;;;
+11BC6;SUNUWAR LETTER KIK;Lo;0;L;;;;;N;;;;;
+11BC7;SUNUWAR LETTER MA;Lo;0;L;;;;;N;;;;;
+11BC8;SUNUWAR LETTER APPHO;Lo;0;L;;;;;N;;;;;
+11BC9;SUNUWAR LETTER PIP;Lo;0;L;;;;;N;;;;;
+11BCA;SUNUWAR LETTER GIL;Lo;0;L;;;;;N;;;;;
+11BCB;SUNUWAR LETTER HAMSO;Lo;0;L;;;;;N;;;;;
+11BCC;SUNUWAR LETTER CARMI;Lo;0;L;;;;;N;;;;;
+11BCD;SUNUWAR LETTER NAH;Lo;0;L;;;;;N;;;;;
+11BCE;SUNUWAR LETTER BUR;Lo;0;L;;;;;N;;;;;
+11BCF;SUNUWAR LETTER JYAH;Lo;0;L;;;;;N;;;;;
+11BD0;SUNUWAR LETTER LOACHA;Lo;0;L;;;;;N;;;;;
+11BD1;SUNUWAR LETTER OTTHI;Lo;0;L;;;;;N;;;;;
+11BD2;SUNUWAR LETTER SHYELE;Lo;0;L;;;;;N;;;;;
+11BD3;SUNUWAR LETTER VARCA;Lo;0;L;;;;;N;;;;;
+11BD4;SUNUWAR LETTER YAT;Lo;0;L;;;;;N;;;;;
+11BD5;SUNUWAR LETTER AVA;Lo;0;L;;;;;N;;;;;
+11BD6;SUNUWAR LETTER AAL;Lo;0;L;;;;;N;;;;;
+11BD7;SUNUWAR LETTER DONGA;Lo;0;L;;;;;N;;;;;
+11BD8;SUNUWAR LETTER THARI;Lo;0;L;;;;;N;;;;;
+11BD9;SUNUWAR LETTER PHAR;Lo;0;L;;;;;N;;;;;
+11BDA;SUNUWAR LETTER NGAR;Lo;0;L;;;;;N;;;;;
+11BDB;SUNUWAR LETTER KHA;Lo;0;L;;;;;N;;;;;
+11BDC;SUNUWAR LETTER SHYER;Lo;0;L;;;;;N;;;;;
+11BDD;SUNUWAR LETTER CHELAP;Lo;0;L;;;;;N;;;;;
+11BDE;SUNUWAR LETTER TENTU;Lo;0;L;;;;;N;;;;;
+11BDF;SUNUWAR LETTER THELE;Lo;0;L;;;;;N;;;;;
+11BE0;SUNUWAR LETTER KLOKO;Lo;0;L;;;;;N;;;;;
+11BE1;SUNUWAR SIGN PVO;Po;0;L;;;;;N;;;;;
+11BF0;SUNUWAR DIGIT ZERO;Nd;0;L;;0;0;0;N;;;;;
+11BF1;SUNUWAR DIGIT ONE;Nd;0;L;;1;1;1;N;;;;;
+11BF2;SUNUWAR DIGIT TWO;Nd;0;L;;2;2;2;N;;;;;
+11BF3;SUNUWAR DIGIT THREE;Nd;0;L;;3;3;3;N;;;;;
+11BF4;SUNUWAR DIGIT FOUR;Nd;0;L;;4;4;4;N;;;;;
+11BF5;SUNUWAR DIGIT FIVE;Nd;0;L;;5;5;5;N;;;;;
+11BF6;SUNUWAR DIGIT SIX;Nd;0;L;;6;6;6;N;;;;;
+11BF7;SUNUWAR DIGIT SEVEN;Nd;0;L;;7;7;7;N;;;;;
+11BF8;SUNUWAR DIGIT EIGHT;Nd;0;L;;8;8;8;N;;;;;
+11BF9;SUNUWAR DIGIT NINE;Nd;0;L;;9;9;9;N;;;;;
11C00;BHAIKSUKI LETTER A;Lo;0;L;;;;;N;;;;;
11C01;BHAIKSUKI LETTER AA;Lo;0;L;;;;;N;;;;;
11C02;BHAIKSUKI LETTER I;Lo;0;L;;;;;N;;;;;
diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt
index 4fb2f1c596..25bf84501c 100644
--- a/unicodetools/data/ucd/dev/VerticalOrientation.txt
+++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt
@@ -1,5 +1,9 @@
# VerticalOrientation-16.0.0.txt
+<<<<<<< HEAD
# Date: 2023-10-02, 13:28:05 GMT
+=======
+# Date: 2023-10-17, 12:29:38 GMT
+>>>>>>> la-vache/main
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -268,7 +272,7 @@
0888 ; R # Sk ARABIC RAISED ROUND DOT
0889..088E ; R # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; R # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; R # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; R # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; R # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; R # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; R # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -436,7 +440,7 @@
0C4A..0C4D ; R # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; R # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; R # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; R # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; R # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; R # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; R # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; R # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -462,7 +466,7 @@
0CCA..0CCB ; R # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; R # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; R # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; R # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; R # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; R # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; R # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; R # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -793,7 +797,7 @@
1C5A..1C77 ; R # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; R # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; R # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; R # Ll [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; R # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; R # Lu [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; R # Lu [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; R # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1694,6 +1698,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA
105A3..105B1 ; R # Ll [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; R # Ll [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; R # Ll [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; R # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; R # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; R # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; R # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1771,7 +1776,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA
10EAB..10EAC ; R # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; R # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; R # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; R # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1949,6 +1955,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA
116B8 ; R # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; R # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; R # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; R # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; R # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; R # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; R # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -2023,6 +2030,9 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA
11AB0..11ABF ; U # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
11AC0..11AF8 ; R # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; R # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; R # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; R # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; R # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; R # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; R # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; R # Mc BHAIKSUKI VOWEL SIGN AA
diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
index 12453cbdb5..797d5c000d 100644
--- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt
@@ -1,5 +1,5 @@
-# GraphemeBreakProperty-15.1.0.txt
-# Date: 2023-01-05, 20:34:41 GMT
+# GraphemeBreakProperty-16.0.0.txt
+# Date: 2023-10-13, 11:29:23 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -106,7 +106,7 @@ E01F0..E0FFF ; Control # Cn [3600] ..
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; Extend # Mn DEVANAGARI VOWEL SIGN OE
@@ -325,7 +325,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; Extend # Mn BRAHMI SIGN ANUSVARA
@@ -459,7 +459,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2130
+# Total code points: 2132
# ================================================
diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
index f848af1d11..54c8be3db5 100644
--- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt
@@ -1,5 +1,5 @@
-# SentenceBreakProperty-15.1.0.txt
-# Date: 2023-07-28, 23:34:37 GMT
+# SentenceBreakProperty-16.0.0.txt
+# Date: 2023-10-17, 12:29:35 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -55,7 +55,7 @@
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
@@ -372,7 +372,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU
@@ -567,7 +567,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2550
+# Total code points: 2552
# ================================================
@@ -892,6 +892,7 @@ E0001 ; Format # Cf LANGUAGE TAG
10FC ; Lower # Lm MODIFIER LETTER GEORGIAN NAR
13F8..13FD ; Lower # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
1C80..1C88 ; Lower # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A ; Lower # L& CYRILLIC SMALL LETTER TJE
1D00..1D2B ; Lower # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D2C..1D6A ; Lower # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI
1D6B..1D77 ; Lower # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
@@ -1297,7 +1298,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 2497
+# Total code points: 2498
# ================================================
@@ -1577,6 +1578,7 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN
10C7 ; Upper # L& GEORGIAN CAPITAL LETTER YN
10CD ; Upper # L& GEORGIAN CAPITAL LETTER AEN
13A0..13F5 ; Upper # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89 ; Upper # L& CYRILLIC CAPITAL LETTER TJE
1E00 ; Upper # L& LATIN CAPITAL LETTER A WITH RING BELOW
1E02 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT ABOVE
1E04 ; Upper # L& LATIN CAPITAL LETTER B WITH DOT BELOW
@@ -1954,7 +1956,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
1F150..1F169 ; Upper # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; Upper # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 1936
+# Total code points: 1937
# ================================================
@@ -2062,7 +2064,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0C2A..0C39 ; OLetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; OLetter # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; OLetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; OLetter # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; OLetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; OLetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C80 ; OLetter # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; OLetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -2071,7 +2073,7 @@ FF21..FF3A ; Upper # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LAT
0CAA..0CB3 ; OLetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; OLetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; OLetter # Lo KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; OLetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; OLetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; OLetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; OLetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; OLetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -2321,6 +2323,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10450..1049D ; OLetter # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
10500..10527 ; OLetter # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE
10530..10563 ; OLetter # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
+105C0..105F3 ; OLetter # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; OLetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; OLetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; OLetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -2355,6 +2358,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10D00..10D23 ; OLetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; OLetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; OLetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; OLetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; OLetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; OLetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; OLetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -2426,6 +2430,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11A5C..11A89 ; OLetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; OLetter # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; OLetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; OLetter # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; OLetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; OLetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; OLetter # Lo BHAIKSUKI SIGN AVAGRAHA
@@ -2536,7 +2541,7 @@ FFDA..FFDC ; OLetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
30000..3134A ; OLetter # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; OLetter # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 132658
+# Total code points: 132748
# ================================================
@@ -2595,9 +2600,11 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; Numeric # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11BF0..11BF9 ; Numeric # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@@ -2612,7 +2619,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-# Total code points: 694
+# Total code points: 724
# ================================================
diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
index 302a2769b3..f7b1418eae 100644
--- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
+++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt
@@ -1,5 +1,5 @@
-# WordBreakProperty-15.1.0.txt
-# Date: 2023-03-31, 03:19:05 GMT
+# WordBreakProperty-16.0.0.txt
+# Date: 2023-10-17, 12:29:39 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -91,7 +91,7 @@ FB46..FB4F ; Hebrew_Letter # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW
0825..0827 ; Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Extend # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
0903 ; Extend # Mc DEVANAGARI SIGN VISARGA
@@ -408,7 +408,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
10AE5..10AE6 ; Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; Extend # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; Extend # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11000 ; Extend # Mc BRAHMI SIGN CANDRABINDU
@@ -604,7 +604,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT
E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2554
+# Total code points: 2556
# ================================================
@@ -794,7 +794,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0C2A..0C39 ; ALetter # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; ALetter # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; ALetter # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; ALetter # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; ALetter # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; ALetter # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C80 ; ALetter # Lo KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; ALetter # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -803,7 +803,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
0CAA..0CB3 ; ALetter # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; ALetter # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; ALetter # Lo KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; ALetter # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; ALetter # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; ALetter # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; ALetter # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; ALetter # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -877,7 +877,7 @@ FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAK
1C4D..1C4F ; ALetter # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; ALetter # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; ALetter # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; ALetter # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; ALetter # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; ALetter # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; ALetter # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CE9..1CEC ; ALetter # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL
@@ -1077,6 +1077,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
105A3..105B1 ; ALetter # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; ALetter # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; ALetter # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; ALetter # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; ALetter # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; ALetter # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; ALetter # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1115,6 +1116,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
10D00..10D23 ; ALetter # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; ALetter # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; ALetter # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; ALetter # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; ALetter # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; ALetter # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; ALetter # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -1185,6 +1187,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
11A5C..11A89 ; ALetter # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; ALetter # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; ALetter # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; ALetter # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; ALetter # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; ALetter # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; ALetter # Lo BHAIKSUKI SIGN AVAGRAHA
@@ -1313,7 +1316,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL
1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z
1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z
-# Total code points: 29490
+# Total code points: 29582
# ================================================
@@ -1417,9 +1420,11 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
114D0..114D9 ; Numeric # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Numeric # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Numeric # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; Numeric # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11730..11739 ; Numeric # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Numeric # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Numeric # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11BF0..11BF9 ; Numeric # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C50..11C59 ; Numeric # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Numeric # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Numeric # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@@ -1434,7 +1439,7 @@ FF10..FF19 ; Numeric # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E950..1E959 ; Numeric # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Numeric # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-# Total code points: 693
+# Total code points: 723
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
index ab3c835ef9..f075a60381 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt
@@ -1,5 +1,5 @@
# DerivedBidiClass-16.0.0.txt
-# Date: 2023-10-02, 13:27:30 GMT
+# Date: 2023-10-17, 12:28:55 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -272,7 +272,7 @@
0C3D ; L # Lo TELUGU SIGN AVAGRAHA
0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR
0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; L # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
0C77 ; L # Po TELUGU SIGN SIDDHAM
@@ -293,7 +293,7 @@
0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI
0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; L # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
@@ -517,7 +517,7 @@
1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; L # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; L # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -821,6 +821,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; L # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -939,6 +940,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; L # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; L # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
11726 ; L # Mc AHOM VOWEL SIGN E
@@ -991,6 +993,9 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; L # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; L # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; L # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; L # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA
@@ -1182,8 +1187,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER
F0000..FFFFD ; L # Co [65534] ..
100000..10FFFD; L # Co [65534] ..
-# The above property value applies to 820457 code points not listed here.
-# Total code points: 1096262
+# The above property value applies to 820342 code points not listed here.
+# Total code points: 1096267
# ================================================
@@ -1899,7 +1904,6 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA
16FE2 ; ON # Po OLD CHINESE HOOK MARK
-1CEB0..1CEB3 ; ON # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON
1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
1D245 ; ON # So GREEK MUSICAL LEIMMA
@@ -1936,7 +1940,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B2 ; ON # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
+1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA7C ; ON # So [13] BALLET SHOES..CRUTCH
@@ -1949,7 +1953,7 @@ FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTE
1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
-# Total code points: 6039
+# Total code points: 6034
# ================================================
@@ -2024,7 +2028,7 @@ FFFFE..FFFFF ; BN # Cn [2] ..
0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; NSM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; NSM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE
@@ -2225,7 +2229,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC
10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; NSM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; NSM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; NSM # Mn BRAHMI SIGN ANUSVARA
@@ -2349,7 +2353,7 @@ FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC
1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1993
+# Total code points: 1995
# ================================================
@@ -2396,6 +2400,7 @@ FDFC ; AL # Sc RIAL SIGN
FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM
FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
+10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
@@ -2441,8 +2446,8 @@ FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WI
1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
-# The above property value applies to 298 code points not listed here.
-# Total code points: 1769
+# The above property value applies to 293 code points not listed here.
+# Total code points: 1767
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
index 7c04d382db..639e4123c8 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt
@@ -1,5 +1,5 @@
# DerivedCombiningClass-16.0.0.txt
-# Date: 2023-10-02, 13:27:32 GMT
+# Date: 2023-10-17, 12:28:58 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -335,7 +335,7 @@
0C46..0C48 ; 0 # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
0C4A..0C4C ; 0 # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU
0C58..0C5A ; 0 # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; 0 # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; 0 # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; 0 # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; 0 # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; 0 # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -360,7 +360,7 @@
0CCA..0CCB ; 0 # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC ; 0 # Mn KANNADA VOWEL SIGN AU
0CD5..0CD6 ; 0 # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; 0 # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; 0 # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; 0 # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; 0 # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; 0 # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -664,7 +664,7 @@
1C5A..1C77 ; 0 # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; 0 # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; 0 # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; 0 # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; 0 # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; 0 # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; 0 # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; 0 # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1392,6 +1392,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
105A3..105B1 ; 0 # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; 0 # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; 0 # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; 0 # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; 0 # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; 0 # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; 0 # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1463,6 +1464,8 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
10E80..10EA9 ; 0 # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EAD ; 0 # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; 0 # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; 0 # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC ; 0 # Mn ARABIC COMBINING ALEF OVERLAY
10F00..10F1C ; 0 # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; 0 # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; 0 # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1622,6 +1625,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
116B8 ; 0 # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; 0 # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; 0 # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; 0 # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; 0 # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; 0 # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; 0 # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -1687,6 +1691,9 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
11A9E..11AA2 ; 0 # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
11AB0..11AF8 ; 0 # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; 0 # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; 0 # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; 0 # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; 0 # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; 0 # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; 0 # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; 0 # Mc BHAIKSUKI VOWEL SIGN AA
@@ -1811,7 +1818,6 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1BC9D ; 0 # Mn DUPLOYAN THICK LETTER SELECTOR
1BC9F ; 0 # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1BCA0..1BCA3 ; 0 # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
-1CEB0..1CEB3 ; 0 # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
1CF00..1CF2D ; 0 # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 ; 0 # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1CF50..1CFC3 ; 0 # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
@@ -1978,7 +1984,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1F850..1F859 ; 0 # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; 0 # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; 0 # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B2 ; 0 # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
+1F8B0..1F8B1 ; 0 # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1FA53 ; 0 # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; 0 # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA7C ; 0 # So [13] BALLET SHOES..CRUTCH
@@ -2006,8 +2012,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
F0000..FFFFD ; 0 # Co [65534] ..
100000..10FFFD; 0 # Co [65534] ..
-# The above property value applies to 826761 code points not listed here.
-# Total code points: 1113190
+# The above property value applies to 826641 code points not listed here.
+# Total code points: 1113189
# ================================================
@@ -2653,7 +2659,7 @@ FE27..FE2D ; 220 # Mn [7] COMBINING LIGATURE LEFT HALF BELOW..COMBINING CON
081B..0823 ; 230 # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A
0825..0827 ; 230 # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; 230 # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
-0898 ; 230 # Mn ARABIC SMALL HIGH WORD AL-JUZ
+0897..0898 ; 230 # Mn [2] ARABIC PEPET..ARABIC SMALL HIGH WORD AL-JUZ
089C..089F ; 230 # Mn [4] ARABIC MADDA WAAJIB..ARABIC HALF MADDA OVER MADDA
08CA..08CE ; 230 # Mn [5] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT ABOVE
08D4..08E1 ; 230 # Mn [14] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH SIGN SAFHA
@@ -2742,7 +2748,7 @@ FE2E..FE2F ; 230 # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYR
1E4EF ; 230 # Mn NAG MUNDARI SIGN SUTUH
1E944..1E949 ; 230 # Mn [6] ADLAM ALIF LENGTHENER..ADLAM GEMINATE CONSONANT MODIFIER
-# Total code points: 510
+# Total code points: 511
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt
index 6bd9e8ee64..b1789d11fb 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedDecompositionType.txt
@@ -1,5 +1,5 @@
-# DerivedDecompositionType-15.1.0.txt
-# Date: 2023-01-05, 20:34:36 GMT
+# DerivedDecompositionType-16.0.0.txt
+# Date: 2023-10-16, 14:22:23 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -249,6 +249,8 @@ FB3E ; Canonical # Lo HEBREW LETTER MEM WITH DAGESH
FB40..FB41 ; Canonical # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
FB43..FB44 ; Canonical # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LETTER PE WITH RAFE
+105C9 ; Canonical # Lo TODHRI LETTER EI
+105E4 ; Canonical # Lo TODHRI LETTER U
1109A ; Canonical # Lo KAITHI LETTER DDDHA
1109C ; Canonical # Lo KAITHI LETTER RHA
110AB ; Canonical # Lo KAITHI LETTER VA
@@ -262,7 +264,7 @@ FB46..FB4E ; Canonical # Lo [9] HEBREW LETTER TSADI WITH DAGESH..HEBREW LET
1D1BB..1D1C0 ; Canonical # So [6] MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
2F800..2FA1D ; Canonical # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
-# Total code points: 13233
+# Total code points: 13235
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
index 57173ab545..99c7aabe0b 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt
@@ -1,5 +1,5 @@
# DerivedEastAsianWidth-16.0.0.txt
-# Date: 2023-10-02, 13:27:34 GMT
+# Date: 2023-10-17, 12:29:01 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -225,7 +225,7 @@
0888 ; N # Sk ARABIC RAISED ROUND DOT
0889..088E ; N # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL
0890..0891 ; N # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE
-0898..089F ; N # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; N # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08A0..08C8 ; N # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF
08C9 ; N # Lm ARABIC SMALL FARSI YEH
08CA..08E1 ; N # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
@@ -391,7 +391,7 @@
0C4A..0C4D ; N # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
0C55..0C56 ; N # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
0C58..0C5A ; N # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; N # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; N # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; N # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C62..0C63 ; N # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL
0C66..0C6F ; N # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE
@@ -417,7 +417,7 @@
0CCA..0CCB ; N # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO
0CCC..0CCD ; N # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
0CD5..0CD6 ; N # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CDD..0CDE ; N # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; N # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; N # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CE2..0CE3 ; N # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
0CE6..0CEF ; N # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
@@ -743,7 +743,7 @@
1C5A..1C77 ; N # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; N # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
1C7E..1C7F ; N # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80..1C88 ; N # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; N # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; N # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; N # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; N # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1397,6 +1397,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
105A3..105B1 ; N # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; N # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; N # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; N # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; N # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; N # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; N # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1472,7 +1473,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
10EAB..10EAC ; N # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
10EAD ; N # Pd YEZIDI HYPHENATION MARK
10EB0..10EB1 ; N # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
-10EFD..10EFF ; N # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EC2..10EC4 ; N # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC..10EFF ; N # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F00..10F1C ; N # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; N # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; N # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1647,6 +1649,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
116B8 ; N # Lo TAKRI LETTER ARCHAIC KHA
116B9 ; N # Po TAKRI ABBREVIATION SIGN
116C0..116C9 ; N # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; N # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11700..1171A ; N # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA
1171D..1171F ; N # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA
11720..11721 ; N # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA
@@ -1717,6 +1720,9 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
11A9E..11AA2 ; N # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
11AB0..11AF8 ; N # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
11B00..11B09 ; N # Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BC0..11BE0 ; N # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; N # Po SUNUWAR SIGN PVO
+11BF0..11BF9 ; N # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C00..11C08 ; N # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; N # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C2F ; N # Mc BHAIKSUKI VOWEL SIGN AA
@@ -1830,7 +1836,6 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
1BC9D..1BC9E ; N # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK
1BC9F ; N # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP
1BCA0..1BCA3 ; N # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP
-1CEB0..1CEB3 ; N # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
1CF00..1CF2D ; N # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT
1CF30..1CF46 ; N # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG
1CF50..1CFC3 ; N # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
@@ -2032,7 +2037,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
1F850..1F859 ; N # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; N # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; N # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B2 ; N # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
+1F8B0..1F8B1 ; N # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1F90B ; N # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F93B ; N # So MODERN PENTATHLON
1F946 ; N # So RIFLE
@@ -2044,7 +2049,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER
E0001 ; N # Cf LANGUAGE TAG
E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG
-# The above property value applies to 766279 code points not listed here.
+# The above property value applies to 766159 code points not listed here.
# Total code points: 792618
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
index 70bb944b41..67eed4460f 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt
@@ -1,5 +1,5 @@
# DerivedGeneralCategory-16.0.0.txt
-# Date: 2023-10-02, 13:27:35 GMT
+# Date: 2023-10-17, 12:29:02 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -37,7 +37,7 @@
085F ; Cn #
086B..086F ; Cn # [5] ..
088F ; Cn #
-0892..0897 ; Cn # [6] ..
+0892..0896 ; Cn # [5] ..
0984 ; Cn #
098D..098E ; Cn # [2] ..
0991..0992 ; Cn # [2] ..
@@ -120,7 +120,7 @@
0C49 ; Cn #
0C4E..0C54 ; Cn # [7] ..
0C57 ; Cn #
-0C5B..0C5C ; Cn # [2] ..
+0C5B ; Cn #
0C5E..0C5F ; Cn # [2] ..
0C64..0C65 ; Cn # [2] ..
0C70..0C76 ; Cn # [7] ..
@@ -132,7 +132,7 @@
0CC5 ; Cn #
0CC9 ; Cn #
0CCE..0CD4 ; Cn # [7] ..
-0CD7..0CDC ; Cn # [6] ..
+0CD7..0CDB ; Cn # [5] ..
0CDF ; Cn #
0CE4..0CE5 ; Cn # [2] ..
0CF0 ; Cn #
@@ -234,7 +234,7 @@
1BF4..1BFB ; Cn # [8] ..
1C38..1C3A ; Cn # [3] ..
1C4A..1C4C ; Cn # [3] ..
-1C89..1C8F ; Cn # [7] ..
+1C8B..1C8F ; Cn # [5] ..
1CBB..1CBC ; Cn # [2] ..
1CC8..1CCF ; Cn # [8] ..
1CFB..1CFF ; Cn # [5] ..
@@ -388,7 +388,8 @@ FFFE..FFFF ; Cn # [2] ..
105A2 ; Cn #
105B2 ; Cn #
105BA ; Cn #
-105BD..105FF ; Cn # [67] ..
+105BD..105BF ; Cn # [3] ..
+105F4..105FF ; Cn # [12] ..
10737..1073F ; Cn # [9] ..
10756..1075F ; Cn # [10] ..
10768..1077F ; Cn # [24] ..
@@ -435,7 +436,8 @@ FFFE..FFFF ; Cn # [2] ..
10E7F ; Cn #
10EAA ; Cn #
10EAE..10EAF ; Cn # [2] ..
-10EB2..10EFC ; Cn # [75] ..
+10EB2..10EC1 ; Cn # [16] ..
+10EC5..10EFB ; Cn # [55] ..
10F28..10F2F ; Cn # [8] ..
10F5A..10F6F ; Cn # [22] ..
10F8A..10FAF ; Cn # [38] ..
@@ -486,7 +488,8 @@ FFFE..FFFF ; Cn # [2] ..
1165A..1165F ; Cn # [6] ..
1166D..1167F ; Cn # [19] ..
116BA..116BF ; Cn # [6] ..
-116CA..116FF ; Cn # [54] ..
+116CA..116CF ; Cn # [6] ..
+116E4..116FF ; Cn # [28] ..
1171B..1171C ; Cn # [2] ..
1172C..1172F ; Cn # [4] ..
11747..117FF ; Cn # [185] ..
@@ -506,7 +509,9 @@ FFFE..FFFF ; Cn # [2] ..
11A48..11A4F ; Cn # [8] ..
11AA3..11AAF ; Cn # [13] ..
11AF9..11AFF ; Cn # [7] ..
-11B0A..11BFF ; Cn # [246] ..
+11B0A..11BBF ; Cn # [182] ..
+11BE2..11BEF ; Cn # [14] ..
+11BFA..11BFF ; Cn # [6] ..
11C09 ; Cn #
11C37 ; Cn #
11C46..11C4F ; Cn # [10] ..
@@ -574,8 +579,7 @@ FFFE..FFFF ; Cn # [2] ..
1BC7D..1BC7F ; Cn # [3] ..
1BC89..1BC8F ; Cn # [7] ..
1BC9A..1BC9B ; Cn # [2] ..
-1BCA4..1CEAF ; Cn # [4620] ..
-1CEB4..1CEFF ; Cn # [76] ..
+1BCA4..1CEFF ; Cn # [4700] ..
1CF2E..1CF2F ; Cn # [2] ..
1CF47..1CF4F ; Cn # [9] ..
1CFC4..1CFFF ; Cn # [60] ..
@@ -696,7 +700,7 @@ FFFE..FFFF ; Cn # [2] ..
1F85A..1F85F ; Cn # [6] ..
1F888..1F88F ; Cn # [8] ..
1F8AE..1F8AF ; Cn # [2] ..
-1F8B3..1F8FF ; Cn # [77] ..
+1F8B2..1F8FF ; Cn # [78] ..
1FA54..1FA5F ; Cn # [12] ..
1FA6E..1FA6F ; Cn # [2] ..
1FA7D..1FA7F ; Cn # [3] ..
@@ -724,7 +728,7 @@ E01F0..EFFFF ; Cn # [65040] ..
FFFFE..FFFFF ; Cn # [2] ..
10FFFE..10FFFF; Cn # [2] ..
-# Total code points: 824713
+# Total code points: 824593
# ================================================
@@ -1006,6 +1010,7 @@ FFFFE..FFFFF ; Cn # [2] ..
10C7 ; Lu # GEORGIAN CAPITAL LETTER YN
10CD ; Lu # GEORGIAN CAPITAL LETTER AEN
13A0..13F5 ; Lu # [86] CHEROKEE LETTER A..CHEROKEE LETTER MV
+1C89 ; Lu # CYRILLIC CAPITAL LETTER TJE
1C90..1CBA ; Lu # [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; Lu # [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1E00 ; Lu # LATIN CAPITAL LETTER A WITH RING BELOW
@@ -1377,7 +1382,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
1D7CA ; Lu # MATHEMATICAL BOLD CAPITAL DIGAMMA
1E900..1E921 ; Lu # [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA
-# Total code points: 1831
+# Total code points: 1832
# ================================================
@@ -1657,6 +1662,7 @@ FF21..FF3A ; Lu # [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAP
10FD..10FF ; Ll # [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
13F8..13FD ; Ll # [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV
1C80..1C88 ; Ll # [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C8A ; Ll # CYRILLIC SMALL LETTER TJE
1D00..1D2B ; Ll # [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
1D6B..1D77 ; Ll # [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
1D79..1D9A ; Ll # [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
@@ -2042,7 +2048,7 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL
1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 2233
+# Total code points: 2234
# ================================================
@@ -2233,7 +2239,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
0C2A..0C39 ; Lo # [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; Lo # TELUGU SIGN AVAGRAHA
0C58..0C5A ; Lo # [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; Lo # TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; Lo # [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; Lo # [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C80 ; Lo # KANNADA SIGN SPACING CANDRABINDU
0C85..0C8C ; Lo # [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L
@@ -2242,7 +2248,7 @@ FF9E..FF9F ; Lm # [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAK
0CAA..0CB3 ; Lo # [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; Lo # [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; Lo # KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; Lo # [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; Lo # [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; Lo # [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; Lo # [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; Lo # [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -2452,6 +2458,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
10450..1049D ; Lo # [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO
10500..10527 ; Lo # [40] ELBASAN LETTER A..ELBASAN LETTER KHE
10530..10563 ; Lo # [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW
+105C0..105F3 ; Lo # [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; Lo # [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; Lo # [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; Lo # [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -2485,6 +2492,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
10D00..10D23 ; Lo # [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
10E80..10EA9 ; Lo # [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; Lo # [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; Lo # [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; Lo # [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F27 ; Lo # OLD SOGDIAN LIGATURE AYIN-DALETH
10F30..10F45 ; Lo # [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@@ -2556,6 +2564,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
11A5C..11A89 ; Lo # [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; Lo # SOYOMBO MARK PLUTA
11AB0..11AF8 ; Lo # [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; Lo # [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
11C00..11C08 ; Lo # [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; Lo # [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; Lo # BHAIKSUKI SIGN AVAGRAHA
@@ -2655,7 +2664,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
30000..3134A ; Lo # [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Lo # [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# Total code points: 132234
+# Total code points: 132324
# ================================================
@@ -2685,7 +2694,7 @@ FFDA..FFDC ; Lo # [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
0825..0827 ; Mn # [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; Mn # [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; Mn # [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; Mn # [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; Mn # [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; Mn # [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; Mn # [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; Mn # DEVANAGARI VOWEL SIGN OE
@@ -2884,7 +2893,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
10AE5..10AE6 ; Mn # [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; Mn # [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Mn # [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; Mn # [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; Mn # [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; Mn # [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; Mn # [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; Mn # BRAHMI SIGN ANUSVARA
@@ -3008,7 +3017,7 @@ FE20..FE2F ; Mn # [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITL
1E944..1E94A ; Mn # [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
E0100..E01EF ; Mn # [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 1985
+# Total code points: 1987
# ================================================
@@ -3263,9 +3272,11 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
114D0..114D9 ; Nd # [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Nd # [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Nd # [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; Nd # [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11730..11739 ; Nd # [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Nd # [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Nd # [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11BF0..11BF9 ; Nd # [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C50..11C59 ; Nd # [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Nd # [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Nd # [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@@ -3280,7 +3291,7 @@ FF10..FF19 ; Nd # [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E950..1E959 ; Nd # [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Nd # [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-# Total code points: 680
+# Total code points: 710
# ================================================
@@ -3848,6 +3859,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
11A9A..11A9C ; Po # [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD
11A9E..11AA2 ; Po # [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
11B00..11B09 ; Po # [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
+11BE1 ; Po # SUNUWAR SIGN PVO
11C41..11C45 ; Po # [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2
11C70..11C71 ; Po # [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD
11EF7..11EF8 ; Po # [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
@@ -3865,7 +3877,7 @@ FF64..FF65 ; Po # [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDL
1DA87..1DA8B ; Po # [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS
1E95E..1E95F ; Po # [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK
-# Total code points: 628
+# Total code points: 629
# ================================================
@@ -4137,7 +4149,6 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
16B3C..16B3F ; So # [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB
16B45 ; So # PAHAWH HMONG SIGN CIM TSOV ROG
1BC9C ; So # DUPLOYAN SIGN O WITH CROSS
-1CEB0..1CEB3 ; So # [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
1CF50..1CFC3 ; So # [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
1D000..1D0F5 ; So # [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; So # [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
@@ -4182,7 +4193,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1F850..1F859 ; So # [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; So # [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; So # [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B0..1F8B2 ; So # [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
+1F8B0..1F8B1 ; So # [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F900..1FA53 ; So # [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP
1FA60..1FA6D ; So # [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
1FA70..1FA7C ; So # [13] BALLET SHOES..CRUTCH
@@ -4195,7 +4206,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER
1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; So # [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
-# Total code points: 6644
+# Total code points: 6639
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt
index 364847b91a..2589107ebd 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningGroup.txt
@@ -1,5 +1,5 @@
-# DerivedJoiningGroup-15.1.0.txt
-# Date: 2023-01-05, 20:34:37 GMT
+# DerivedJoiningGroup-16.0.0.txt
+# Date: 2023-10-02, 12:16:28 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -72,8 +72,9 @@
06EE ; Dal # Lo ARABIC LETTER DAL WITH INVERTED V
0759..075A ; Dal # Lo [2] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER DAL WITH INVERTED SMALL V BELOW
08AE ; Dal # Lo ARABIC LETTER DAL WITH THREE DOTS BELOW
+10EC2 ; Dal # Lo ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW
-# Total code points: 15
+# Total code points: 16
# ================================================
@@ -177,8 +178,9 @@
06AC..06AE ; Kaf # Lo [3] ARABIC LETTER KAF WITH DOT ABOVE..ARABIC LETTER KAF WITH THREE DOTS BELOW
077F ; Kaf # Lo ARABIC LETTER KAF WITH TWO DOTS ABOVE
08B4 ; Kaf # Lo ARABIC LETTER KAF WITH DOT BELOW
+10EC4 ; Kaf # Lo ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
-# Total code points: 6
+# Total code points: 7
# ================================================
@@ -331,8 +333,9 @@
069F ; Tah # Lo ARABIC LETTER TAH WITH THREE DOTS ABOVE
088B..088C ; Tah # Lo [2] ARABIC LETTER TAH WITH DOT BELOW..ARABIC LETTER TAH WITH THREE DOTS BELOW
08A3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS ABOVE
+10EC3 ; Tah # Lo ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW
-# Total code points: 6
+# Total code points: 7
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
index a4e01e7d34..ea82c725e7 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedJoiningType.txt
@@ -1,5 +1,5 @@
-# DerivedJoiningType-15.1.0.txt
-# Date: 2023-01-05, 20:34:38 GMT
+# DerivedJoiningType-16.0.0.txt
+# Date: 2023-10-13, 11:29:21 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -95,6 +95,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA
10BAD..10BAE ; D # No [2] PSALTER PAHLAVI NUMBER TEN..PSALTER PAHLAVI NUMBER TWENTY
10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O
10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA
+10EC3..10EC4 ; D # Lo [2] ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL
10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH
10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY
@@ -110,7 +111,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA
10FCA ; D # No CHORASMIAN NUMBER TWENTY
1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA
-# Total code points: 610
+# Total code points: 612
# ================================================
@@ -173,6 +174,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA
10B91 ; R # Lo PSALTER PAHLAVI LETTER TAW
10BA9..10BAC ; R # No [4] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER FOUR
10D22 ; R # Lo HANIFI ROHINGYA MARK SAKIN
+10EC2 ; R # Lo ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW
10F33 ; R # Lo SOGDIAN LETTER HE
10F54 ; R # No SOGDIAN NUMBER ONE HUNDRED
10F74..10F75 ; R # Lo [2] OLD UYGHUR LETTER ZAYIN..OLD UYGHUR LETTER FINAL HETH
@@ -182,7 +184,7 @@ A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA
10FC2..10FC3 ; R # Lo [2] CHORASMIAN LETTER RESH..CHORASMIAN LETTER SHIN
10FC9 ; R # No CHORASMIAN NUMBER TEN
-# Total code points: 152
+# Total code points: 153
# ================================================
@@ -228,7 +230,7 @@ A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA
0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; T # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; T # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; T # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; T # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
093A ; T # Mn DEVANAGARI VOWEL SIGN OE
@@ -438,7 +440,7 @@ FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATI
10AE5..10AE6 ; T # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; T # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; T # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11001 ; T # Mn BRAHMI SIGN ANUSVARA
@@ -568,6 +570,6 @@ E0001 ; T # Cf LANGUAGE TAG
E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2150
+# Total code points: 2152
# EOF
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
index 73e0511e7e..ad6415932a 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
@@ -1,5 +1,5 @@
# DerivedLineBreak-16.0.0.txt
-# Date: 2023-10-23, 10:06:43 GMT
+# Date: 2023-10-17, 12:29:04 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -68,8 +68,8 @@ E000..F8FF ; XX # Co [6400] ..
F0000..FFFFD ; XX # Co [65534] ..
100000..10FFFD; XX # Co [65534] ..
-# The above property value applies to 762721 code points not listed here.
-# Total code points: 900189
+# The above property value applies to 762600 code points not listed here.
+# Total code points: 900068
# ================================================
@@ -531,8 +531,10 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
114D0..114D9 ; NU # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; NU # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; NU # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; NU # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11730..11739 ; NU # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; NU # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
+11BF0..11BF9 ; NU # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C50..11C59 ; NU # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; NU # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; NU # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@@ -546,7 +548,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
1E950..1E959 ; NU # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; NU # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-# Total code points: 624
+# Total code points: 654
# ================================================
@@ -734,7 +736,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
0C2A..0C39 ; AL # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA
0C3D ; AL # Lo TELUGU SIGN AVAGRAHA
0C58..0C5A ; AL # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA
-0C5D ; AL # Lo TELUGU LETTER NAKAARA POLLU
+0C5C..0C5D ; AL # Lo [2] TELUGU ARCHAIC SHRII..TELUGU LETTER NAKAARA POLLU
0C60..0C61 ; AL # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL
0C78..0C7E ; AL # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR
0C7F ; AL # So TELUGU SIGN TUUMU
@@ -745,7 +747,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
0CAA..0CB3 ; AL # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA
0CB5..0CB9 ; AL # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA
0CBD ; AL # Lo KANNADA SIGN AVAGRAHA
-0CDD..0CDE ; AL # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA
+0CDC..0CDE ; AL # Lo [3] KANNADA ARCHAIC SHRII..KANNADA LETTER FA
0CE0..0CE1 ; AL # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL
0CF1..0CF2 ; AL # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
0D04..0D0C ; AL # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@@ -851,7 +853,7 @@ ABF0..ABF9 ; NU # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
1C4D..1C4F ; AL # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA
1C5A..1C77 ; AL # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH
1C78..1C7D ; AL # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD
-1C80..1C88 ; AL # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK
+1C80..1C8A ; AL # L& [11] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER TJE
1C90..1CBA ; AL # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN
1CBD..1CBF ; AL # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
1CC0..1CC7 ; AL # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA
@@ -1238,6 +1240,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
105A3..105B1 ; AL # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE
105B3..105B9 ; AL # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE
105BB..105BC ; AL # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE
+105C0..105F3 ; AL # Lo [52] TODHRI LETTER A..TODHRI LETTER OO
10600..10736 ; AL # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664
10740..10755 ; AL # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE
10760..10767 ; AL # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807
@@ -1299,6 +1302,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
10E60..10E7E ; AL # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
10E80..10EA9 ; AL # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
10EB0..10EB1 ; AL # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2..10EC4 ; AL # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
10F00..10F1C ; AL # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
10F1D..10F26 ; AL # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
10F27 ; AL # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1373,6 +1377,8 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
11A5C..11A89 ; AL # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA
11A9D ; AL # Lo SOYOMBO MARK PLUTA
11AB0..11AF8 ; AL # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL
+11BC0..11BE0 ; AL # Lo [33] SUNUWAR LETTER DEVI..SUNUWAR LETTER KLOKO
+11BE1 ; AL # Po SUNUWAR SIGN PVO
11C00..11C08 ; AL # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
11C0A..11C2E ; AL # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
11C40 ; AL # Lo BHAIKSUKI SIGN AVAGRAHA
@@ -1430,7 +1436,6 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
1BC80..1BC88 ; AL # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1BC90..1BC99 ; AL # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1BC9C ; AL # So DUPLOYAN SIGN O WITH CROSS
-1CEB0..1CEB3 ; AL # So [4] HORIZONTAL ZIGZAG LINE..BLACK RIGHT TRIANGLE CARET
1CF50..1CFC3 ; AL # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK
1D000..1D0F5 ; AL # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
1D100..1D126 ; AL # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
@@ -1576,13 +1581,12 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
-1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK
1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
-# Total code points: 21734
+# Total code points: 21822
# ================================================
@@ -1825,8 +1829,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR
30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
-# The above property value applies to 61977 code points not listed here.
-# Total code points: 172567
+# The above property value applies to 61978 code points not listed here.
+# Total code points: 172568
# ================================================
@@ -1883,7 +1887,7 @@ FE19 ; IN # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
0825..0827 ; CM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U
0829..082D ; CM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA
0859..085B ; CM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK
-0898..089F ; CM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA
+0897..089F ; CM # Mn [9] ARABIC PEPET..ARABIC HALF MADDA OVER MADDA
08CA..08E1 ; CM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA
08E3..0902 ; CM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA
0903 ; CM # Mc DEVANAGARI SIGN VISARGA
@@ -2148,7 +2152,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT
10AE5..10AE6 ; CM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
10D24..10D27 ; CM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; CM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
-10EFD..10EFF ; CM # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
+10EFC..10EFF ; CM # Mn [4] ARABIC COMBINING ALEF OVERLAY..ARABIC SMALL LOW WORD MADDA
10F46..10F50 ; CM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
10F82..10F85 ; CM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
11000 ; CM # Mc BRAHMI SIGN CANDRABINDU
@@ -2338,7 +2342,7 @@ E0001 ; CM # Cf LANGUAGE TAG
E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG
E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256
-# Total code points: 2429
+# Total code points: 2431
# ================================================
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt
index ac11f73bb4..2f3f005741 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt
@@ -1,5 +1,5 @@
# DerivedName-16.0.0.txt
-# Date: 2023-10-02, 13:27:37 GMT
+# Date: 2023-10-17, 12:29:04 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -2098,6 +2098,7 @@
088E ; ARABIC VERTICAL TAIL
0890 ; ARABIC POUND MARK ABOVE
0891 ; ARABIC PIASTRE MARK ABOVE
+0897 ; ARABIC PEPET
0898 ; ARABIC SMALL HIGH WORD AL-JUZ
0899 ; ARABIC SMALL LOW WORD ISHMAAM
089A ; ARABIC SMALL LOW WORD IMAALA
@@ -2836,6 +2837,7 @@
0C58 ; TELUGU LETTER TSA
0C59 ; TELUGU LETTER DZA
0C5A ; TELUGU LETTER RRRA
+0C5C ; TELUGU ARCHAIC SHRII
0C5D ; TELUGU LETTER NAKAARA POLLU
0C60 ; TELUGU LETTER VOCALIC RR
0C61 ; TELUGU LETTER VOCALIC LL
@@ -2932,6 +2934,7 @@
0CCD ; KANNADA SIGN VIRAMA
0CD5 ; KANNADA LENGTH MARK
0CD6 ; KANNADA AI LENGTH MARK
+0CDC ; KANNADA ARCHAIC SHRII
0CDD ; KANNADA LETTER NAKAARA POLLU
0CDE ; KANNADA LETTER FA
0CE0 ; KANNADA LETTER VOCALIC RR
@@ -6486,6 +6489,8 @@
1C86 ; CYRILLIC SMALL LETTER TALL HARD SIGN
1C87 ; CYRILLIC SMALL LETTER TALL YAT
1C88 ; CYRILLIC SMALL LETTER UNBLENDED UK
+1C89 ; CYRILLIC CAPITAL LETTER TJE
+1C8A ; CYRILLIC SMALL LETTER TJE
1C90 ; GEORGIAN MTAVRULI CAPITAL LETTER AN
1C91 ; GEORGIAN MTAVRULI CAPITAL LETTER BAN
1C92 ; GEORGIAN MTAVRULI CAPITAL LETTER GAN
@@ -28666,6 +28671,58 @@ FFFD ; REPLACEMENT CHARACTER
105B9 ; VITHKUQI SMALL LETTER XE
105BB ; VITHKUQI SMALL LETTER Y
105BC ; VITHKUQI SMALL LETTER ZE
+105C0 ; TODHRI LETTER A
+105C1 ; TODHRI LETTER AS
+105C2 ; TODHRI LETTER BA
+105C3 ; TODHRI LETTER MBA
+105C4 ; TODHRI LETTER CA
+105C5 ; TODHRI LETTER CHA
+105C6 ; TODHRI LETTER DA
+105C7 ; TODHRI LETTER NDA
+105C8 ; TODHRI LETTER DHA
+105C9 ; TODHRI LETTER EI
+105CA ; TODHRI LETTER E
+105CB ; TODHRI LETTER FA
+105CC ; TODHRI LETTER GA
+105CD ; TODHRI LETTER NGA
+105CE ; TODHRI LETTER GJA
+105CF ; TODHRI LETTER NGJA
+105D0 ; TODHRI LETTER HA
+105D1 ; TODHRI LETTER HJA
+105D2 ; TODHRI LETTER I
+105D3 ; TODHRI LETTER JA
+105D4 ; TODHRI LETTER KA
+105D5 ; TODHRI LETTER LA
+105D6 ; TODHRI LETTER LLA
+105D7 ; TODHRI LETTER MA
+105D8 ; TODHRI LETTER NA
+105D9 ; TODHRI LETTER NJAN
+105DA ; TODHRI LETTER O
+105DB ; TODHRI LETTER PA
+105DC ; TODHRI LETTER QA
+105DD ; TODHRI LETTER RA
+105DE ; TODHRI LETTER RRA
+105DF ; TODHRI LETTER SA
+105E0 ; TODHRI LETTER SHA
+105E1 ; TODHRI LETTER SHTA
+105E2 ; TODHRI LETTER TA
+105E3 ; TODHRI LETTER THA
+105E4 ; TODHRI LETTER U
+105E5 ; TODHRI LETTER VA
+105E6 ; TODHRI LETTER XA
+105E7 ; TODHRI LETTER NXA
+105E8 ; TODHRI LETTER XHA
+105E9 ; TODHRI LETTER NXHA
+105EA ; TODHRI LETTER Y
+105EB ; TODHRI LETTER JY
+105EC ; TODHRI LETTER ZA
+105ED ; TODHRI LETTER ZHA
+105EE ; TODHRI LETTER GHA
+105EF ; TODHRI LETTER STA
+105F0 ; TODHRI LETTER SKAN
+105F1 ; TODHRI LETTER KHA
+105F2 ; TODHRI LETTER PSA
+105F3 ; TODHRI LETTER OO
10600 ; LINEAR A SIGN AB001
10601 ; LINEAR A SIGN AB002
10602 ; LINEAR A SIGN AB003
@@ -30065,6 +30122,10 @@ FFFD ; REPLACEMENT CHARACTER
10EAD ; YEZIDI HYPHENATION MARK
10EB0 ; YEZIDI LETTER LAM WITH DOT ABOVE
10EB1 ; YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
+10EC2 ; ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW
+10EC3 ; ARABIC LETTER TAH WITH TWO DOTS VERTICALLY BELOW
+10EC4 ; ARABIC LETTER KAF WITH TWO DOTS VERTICALLY BELOW
+10EFC ; ARABIC COMBINING ALEF OVERLAY
10EFD ; ARABIC SMALL LOW WORD SAKTA
10EFE ; ARABIC SMALL LOW WORD QASR
10EFF ; ARABIC SMALL LOW WORD MADDA
@@ -31360,6 +31421,26 @@ FFFD ; REPLACEMENT CHARACTER
116C7 ; TAKRI DIGIT SEVEN
116C8 ; TAKRI DIGIT EIGHT
116C9 ; TAKRI DIGIT NINE
+116D0 ; MYANMAR PAO DIGIT ZERO
+116D1 ; MYANMAR PAO DIGIT ONE
+116D2 ; MYANMAR PAO DIGIT TWO
+116D3 ; MYANMAR PAO DIGIT THREE
+116D4 ; MYANMAR PAO DIGIT FOUR
+116D5 ; MYANMAR PAO DIGIT FIVE
+116D6 ; MYANMAR PAO DIGIT SIX
+116D7 ; MYANMAR PAO DIGIT SEVEN
+116D8 ; MYANMAR PAO DIGIT EIGHT
+116D9 ; MYANMAR PAO DIGIT NINE
+116DA ; MYANMAR EASTERN PWO KAREN DIGIT ZERO
+116DB ; MYANMAR EASTERN PWO KAREN DIGIT ONE
+116DC ; MYANMAR EASTERN PWO KAREN DIGIT TWO
+116DD ; MYANMAR EASTERN PWO KAREN DIGIT THREE
+116DE ; MYANMAR EASTERN PWO KAREN DIGIT FOUR
+116DF ; MYANMAR EASTERN PWO KAREN DIGIT FIVE
+116E0 ; MYANMAR EASTERN PWO KAREN DIGIT SIX
+116E1 ; MYANMAR EASTERN PWO KAREN DIGIT SEVEN
+116E2 ; MYANMAR EASTERN PWO KAREN DIGIT EIGHT
+116E3 ; MYANMAR EASTERN PWO KAREN DIGIT NINE
11700 ; AHOM LETTER KA
11701 ; AHOM LETTER KHA
11702 ; AHOM LETTER NGA
@@ -31944,6 +32025,50 @@ FFFD ; REPLACEMENT CHARACTER
11B07 ; DEVANAGARI SIGN WESTERN NINE-LIKE BHALE
11B08 ; DEVANAGARI SIGN REVERSED NINE-LIKE BHALE
11B09 ; DEVANAGARI SIGN MINDU
+11BC0 ; SUNUWAR LETTER DEVI
+11BC1 ; SUNUWAR LETTER TASLA
+11BC2 ; SUNUWAR LETTER EKO
+11BC3 ; SUNUWAR LETTER IMAR
+11BC4 ; SUNUWAR LETTER REU
+11BC5 ; SUNUWAR LETTER UTTHI
+11BC6 ; SUNUWAR LETTER KIK
+11BC7 ; SUNUWAR LETTER MA
+11BC8 ; SUNUWAR LETTER APPHO
+11BC9 ; SUNUWAR LETTER PIP
+11BCA ; SUNUWAR LETTER GIL
+11BCB ; SUNUWAR LETTER HAMSO
+11BCC ; SUNUWAR LETTER CARMI
+11BCD ; SUNUWAR LETTER NAH
+11BCE ; SUNUWAR LETTER BUR
+11BCF ; SUNUWAR LETTER JYAH
+11BD0 ; SUNUWAR LETTER LOACHA
+11BD1 ; SUNUWAR LETTER OTTHI
+11BD2 ; SUNUWAR LETTER SHYELE
+11BD3 ; SUNUWAR LETTER VARCA
+11BD4 ; SUNUWAR LETTER YAT
+11BD5 ; SUNUWAR LETTER AVA
+11BD6 ; SUNUWAR LETTER AAL
+11BD7 ; SUNUWAR LETTER DONGA
+11BD8 ; SUNUWAR LETTER THARI
+11BD9 ; SUNUWAR LETTER PHAR
+11BDA ; SUNUWAR LETTER NGAR
+11BDB ; SUNUWAR LETTER KHA
+11BDC ; SUNUWAR LETTER SHYER
+11BDD ; SUNUWAR LETTER CHELAP
+11BDE ; SUNUWAR LETTER TENTU
+11BDF ; SUNUWAR LETTER THELE
+11BE0 ; SUNUWAR LETTER KLOKO
+11BE1 ; SUNUWAR SIGN PVO
+11BF0 ; SUNUWAR DIGIT ZERO
+11BF1 ; SUNUWAR DIGIT ONE
+11BF2 ; SUNUWAR DIGIT TWO
+11BF3 ; SUNUWAR DIGIT THREE
+11BF4 ; SUNUWAR DIGIT FOUR
+11BF5 ; SUNUWAR DIGIT FIVE
+11BF6 ; SUNUWAR DIGIT SIX
+11BF7 ; SUNUWAR DIGIT SEVEN
+11BF8 ; SUNUWAR DIGIT EIGHT
+11BF9 ; SUNUWAR DIGIT NINE
11C00 ; BHAIKSUKI LETTER A
11C01 ; BHAIKSUKI LETTER AA
11C02 ; BHAIKSUKI LETTER I
@@ -37779,10 +37904,6 @@ FFFD ; REPLACEMENT CHARACTER
1BCA1 ; SHORTHAND FORMAT CONTINUING OVERLAP
1BCA2 ; SHORTHAND FORMAT DOWN STEP
1BCA3 ; SHORTHAND FORMAT UP STEP
-1CEB0 ; HORIZONTAL ZIGZAG LINE
-1CEB1 ; KEYHOLE
-1CEB2 ; OLD PERSONAL COMPUTER WITH MONITOR IN PORTRAIT ORIENTATION
-1CEB3 ; BLACK RIGHT TRIANGLE CARET
1CF00 ; ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT
1CF01 ; ZNAMENNY COMBINING MARK NIZKO S KRYZHEM ON LEFT
1CF02 ; ZNAMENNY COMBINING MARK TSATA ON LEFT
@@ -43160,7 +43281,6 @@ FFFD ; REPLACEMENT CHARACTER
1F8AD ; WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8B0 ; ARROW POINTING UPWARDS THEN NORTH WEST
1F8B1 ; ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
-1F8B2 ; RIGHTWARDS ARROW WITH LOWER HOOK
1F900 ; CIRCLED CROSS FORMEE WITH FOUR DOTS
1F901 ; CIRCLED CROSS FORMEE WITH TWO DOTS
1F902 ; CIRCLED CROSS FORMEE
@@ -44181,6 +44301,6 @@ E01ED ; VARIATION SELECTOR-254
E01EE ; VARIATION SELECTOR-255
E01EF ; VARIATION SELECTOR-256
-# Total code points: 149818
+# Total code points: 149938
# EOF
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt
index 062f4fbe5f..edc53da784 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericType.txt
@@ -1,5 +1,5 @@
-# DerivedNumericType-15.1.0.txt
-# Date: 2023-01-05, 20:34:41 GMT
+# DerivedNumericType-16.0.0.txt
+# Date: 2023-10-17, 12:29:07 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -269,9 +269,11 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
114D0..114D9 ; Decimal # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE
11650..11659 ; Decimal # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE
116C0..116C9 ; Decimal # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE
+116D0..116E3 ; Decimal # Nd [20] MYANMAR PAO DIGIT ZERO..MYANMAR EASTERN PWO KAREN DIGIT NINE
11730..11739 ; Decimal # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE
118E0..118E9 ; Decimal # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE
11950..11959 ; Decimal # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE
+11BF0..11BF9 ; Decimal # Nd [10] SUNUWAR DIGIT ZERO..SUNUWAR DIGIT NINE
11C50..11C59 ; Decimal # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE
11D50..11D59 ; Decimal # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE
11DA0..11DA9 ; Decimal # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE
@@ -286,6 +288,6 @@ FF10..FF19 ; Decimal # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
1E950..1E959 ; Decimal # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE
1FBF0..1FBF9 ; Decimal # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
-# Total code points: 680
+# Total code points: 710
# EOF
diff --git a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt
index e671646825..88c117d563 100644
--- a/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt
+++ b/unicodetools/data/ucd/dev/extracted/DerivedNumericValues.txt
@@ -1,5 +1,5 @@
-# DerivedNumericValues-15.1.0.txt
-# Date: 2023-01-05, 20:34:41 GMT
+# DerivedNumericValues-16.0.0.txt
+# Date: 2023-10-17, 12:29:07 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -100,9 +100,12 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
114D0 ; 0.0 ; ; 0 # Nd TIRHUTA DIGIT ZERO
11650 ; 0.0 ; ; 0 # Nd MODI DIGIT ZERO
116C0 ; 0.0 ; ; 0 # Nd TAKRI DIGIT ZERO
+116D0 ; 0.0 ; ; 0 # Nd MYANMAR PAO DIGIT ZERO
+116DA ; 0.0 ; ; 0 # Nd MYANMAR EASTERN PWO KAREN DIGIT ZERO
11730 ; 0.0 ; ; 0 # Nd AHOM DIGIT ZERO
118E0 ; 0.0 ; ; 0 # Nd WARANG CITI DIGIT ZERO
11950 ; 0.0 ; ; 0 # Nd DIVES AKURU DIGIT ZERO
+11BF0 ; 0.0 ; ; 0 # Nd SUNUWAR DIGIT ZERO
11C50 ; 0.0 ; ; 0 # Nd BHAIKSUKI DIGIT ZERO
11D50 ; 0.0 ; ; 0 # Nd MASARAM GONDI DIGIT ZERO
11DA0 ; 0.0 ; ; 0 # Nd GUNJALA GONDI DIGIT ZERO
@@ -126,7 +129,7 @@ FF10 ; 0.0 ; ; 0 # Nd FULLWIDTH DIGIT ZERO
1F10B..1F10C ; 0.0 ; ; 0 # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO
1FBF0 ; 0.0 ; ; 0 # Nd SEGMENTED DIGIT ZERO
-# Total code points: 88
+# Total code points: 91
# ================================================
@@ -511,9 +514,12 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
114D1 ; 1.0 ; ; 1 # Nd TIRHUTA DIGIT ONE
11651 ; 1.0 ; ; 1 # Nd MODI DIGIT ONE
116C1 ; 1.0 ; ; 1 # Nd TAKRI DIGIT ONE
+116D1 ; 1.0 ; ; 1 # Nd MYANMAR PAO DIGIT ONE
+116DB ; 1.0 ; ; 1 # Nd MYANMAR EASTERN PWO KAREN DIGIT ONE
11731 ; 1.0 ; ; 1 # Nd AHOM DIGIT ONE
118E1 ; 1.0 ; ; 1 # Nd WARANG CITI DIGIT ONE
11951 ; 1.0 ; ; 1 # Nd DIVES AKURU DIGIT ONE
+11BF1 ; 1.0 ; ; 1 # Nd SUNUWAR DIGIT ONE
11C51 ; 1.0 ; ; 1 # Nd BHAIKSUKI DIGIT ONE
11C5A ; 1.0 ; ; 1 # No BHAIKSUKI NUMBER ONE
11D51 ; 1.0 ; ; 1 # Nd MASARAM GONDI DIGIT ONE
@@ -553,7 +559,7 @@ FF11 ; 1.0 ; ; 1 # Nd FULLWIDTH DIGIT ONE
1FBF1 ; 1.0 ; ; 1 # Nd SEGMENTED DIGIT ONE
2092A ; 1.0 ; ; 1 # Lo CJK UNIFIED IDEOGRAPH-2092A
-# Total code points: 144
+# Total code points: 147
# ================================================
@@ -661,9 +667,12 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
114D2 ; 2.0 ; ; 2 # Nd TIRHUTA DIGIT TWO
11652 ; 2.0 ; ; 2 # Nd MODI DIGIT TWO
116C2 ; 2.0 ; ; 2 # Nd TAKRI DIGIT TWO
+116D2 ; 2.0 ; ; 2 # Nd MYANMAR PAO DIGIT TWO
+116DC ; 2.0 ; ; 2 # Nd MYANMAR EASTERN PWO KAREN DIGIT TWO
11732 ; 2.0 ; ; 2 # Nd AHOM DIGIT TWO
118E2 ; 2.0 ; ; 2 # Nd WARANG CITI DIGIT TWO
11952 ; 2.0 ; ; 2 # Nd DIVES AKURU DIGIT TWO
+11BF2 ; 2.0 ; ; 2 # Nd SUNUWAR DIGIT TWO
11C52 ; 2.0 ; ; 2 # Nd BHAIKSUKI DIGIT TWO
11C5B ; 2.0 ; ; 2 # No BHAIKSUKI NUMBER TWO
11D52 ; 2.0 ; ; 2 # Nd MASARAM GONDI DIGIT TWO
@@ -707,7 +716,7 @@ FF12 ; 2.0 ; ; 2 # Nd FULLWIDTH DIGIT TWO
1FBF2 ; 2.0 ; ; 2 # Nd SEGMENTED DIGIT TWO
22390 ; 2.0 ; ; 2 # Lo CJK UNIFIED IDEOGRAPH-22390
-# Total code points: 146
+# Total code points: 149
# ================================================
@@ -806,9 +815,12 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
114D3 ; 3.0 ; ; 3 # Nd TIRHUTA DIGIT THREE
11653 ; 3.0 ; ; 3 # Nd MODI DIGIT THREE
116C3 ; 3.0 ; ; 3 # Nd TAKRI DIGIT THREE
+116D3 ; 3.0 ; ; 3 # Nd MYANMAR PAO DIGIT THREE
+116DD ; 3.0 ; ; 3 # Nd MYANMAR EASTERN PWO KAREN DIGIT THREE
11733 ; 3.0 ; ; 3 # Nd AHOM DIGIT THREE
118E3 ; 3.0 ; ; 3 # Nd WARANG CITI DIGIT THREE
11953 ; 3.0 ; ; 3 # Nd DIVES AKURU DIGIT THREE
+11BF3 ; 3.0 ; ; 3 # Nd SUNUWAR DIGIT THREE
11C53 ; 3.0 ; ; 3 # Nd BHAIKSUKI DIGIT THREE
11C5C ; 3.0 ; ; 3 # No BHAIKSUKI NUMBER THREE
11D53 ; 3.0 ; ; 3 # Nd MASARAM GONDI DIGIT THREE
@@ -855,7 +867,7 @@ FF13 ; 3.0 ; ; 3 # Nd FULLWIDTH DIGIT THREE
22998 ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-22998
23B1B ; 3.0 ; ; 3 # Lo CJK UNIFIED IDEOGRAPH-23B1B
-# Total code points: 144
+# Total code points: 147
# ================================================
@@ -948,9 +960,12 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
114D4 ; 4.0 ; ; 4 # Nd TIRHUTA DIGIT FOUR
11654 ; 4.0 ; ; 4 # Nd MODI DIGIT FOUR
116C4 ; 4.0 ; ; 4 # Nd TAKRI DIGIT FOUR
+116D4 ; 4.0 ; ; 4 # Nd MYANMAR PAO DIGIT FOUR
+116DE ; 4.0 ; ; 4 # Nd MYANMAR EASTERN PWO KAREN DIGIT FOUR
11734 ; 4.0 ; ; 4 # Nd AHOM DIGIT FOUR
118E4 ; 4.0 ; ; 4 # Nd WARANG CITI DIGIT FOUR
11954 ; 4.0 ; ; 4 # Nd DIVES AKURU DIGIT FOUR
+11BF4 ; 4.0 ; ; 4 # Nd SUNUWAR DIGIT FOUR
11C54 ; 4.0 ; ; 4 # Nd BHAIKSUKI DIGIT FOUR
11C5D ; 4.0 ; ; 4 # No BHAIKSUKI NUMBER FOUR
11D54 ; 4.0 ; ; 4 # Nd MASARAM GONDI DIGIT FOUR
@@ -996,7 +1011,7 @@ FF14 ; 4.0 ; ; 4 # Nd FULLWIDTH DIGIT FOUR
200E2 ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-200E2
2626D ; 4.0 ; ; 4 # Lo CJK UNIFIED IDEOGRAPH-2626D
-# Total code points: 135
+# Total code points: 138
# ================================================
@@ -1093,9 +1108,12 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
114D5 ; 5.0 ; ; 5 # Nd TIRHUTA DIGIT FIVE
11655 ; 5.0 ; ; 5 # Nd MODI DIGIT FIVE
116C5 ; 5.0 ; ; 5 # Nd TAKRI DIGIT FIVE
+116D5 ; 5.0 ; ; 5 # Nd MYANMAR PAO DIGIT FIVE
+116DF ; 5.0 ; ; 5 # Nd MYANMAR EASTERN PWO KAREN DIGIT FIVE
11735 ; 5.0 ; ; 5 # Nd AHOM DIGIT FIVE
118E5 ; 5.0 ; ; 5 # Nd WARANG CITI DIGIT FIVE
11955 ; 5.0 ; ; 5 # Nd DIVES AKURU DIGIT FIVE
+11BF5 ; 5.0 ; ; 5 # Nd SUNUWAR DIGIT FIVE
11C55 ; 5.0 ; ; 5 # Nd BHAIKSUKI DIGIT FIVE
11C5E ; 5.0 ; ; 5 # No BHAIKSUKI NUMBER FIVE
11D55 ; 5.0 ; ; 5 # Nd MASARAM GONDI DIGIT FIVE
@@ -1139,7 +1157,7 @@ FF15 ; 5.0 ; ; 5 # Nd FULLWIDTH DIGIT FIVE
1FBF5 ; 5.0 ; ; 5 # Nd SEGMENTED DIGIT FIVE
20121 ; 5.0 ; ; 5 # Lo CJK UNIFIED IDEOGRAPH-20121
-# Total code points: 133
+# Total code points: 136
# ================================================
@@ -1226,9 +1244,12 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
114D6 ; 6.0 ; ; 6 # Nd TIRHUTA DIGIT SIX
11656 ; 6.0 ; ; 6 # Nd MODI DIGIT SIX
116C6 ; 6.0 ; ; 6 # Nd TAKRI DIGIT SIX
+116D6 ; 6.0 ; ; 6 # Nd MYANMAR PAO DIGIT SIX
+116E0 ; 6.0 ; ; 6 # Nd MYANMAR EASTERN PWO KAREN DIGIT SIX
11736 ; 6.0 ; ; 6 # Nd AHOM DIGIT SIX
118E6 ; 6.0 ; ; 6 # Nd WARANG CITI DIGIT SIX
11956 ; 6.0 ; ; 6 # Nd DIVES AKURU DIGIT SIX
+11BF6 ; 6.0 ; ; 6 # Nd SUNUWAR DIGIT SIX
11C56 ; 6.0 ; ; 6 # Nd BHAIKSUKI DIGIT SIX
11C5F ; 6.0 ; ; 6 # No BHAIKSUKI NUMBER SIX
11D56 ; 6.0 ; ; 6 # Nd MASARAM GONDI DIGIT SIX
@@ -1267,7 +1288,7 @@ FF16 ; 6.0 ; ; 6 # Nd FULLWIDTH DIGIT SIX
1FBF6 ; 6.0 ; ; 6 # Nd SEGMENTED DIGIT SIX
20AEA ; 6.0 ; ; 6 # Lo CJK UNIFIED IDEOGRAPH-20AEA
-# Total code points: 117
+# Total code points: 120
# ================================================
@@ -1353,9 +1374,12 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
114D7 ; 7.0 ; ; 7 # Nd TIRHUTA DIGIT SEVEN
11657 ; 7.0 ; ; 7 # Nd MODI DIGIT SEVEN
116C7 ; 7.0 ; ; 7 # Nd TAKRI DIGIT SEVEN
+116D7 ; 7.0 ; ; 7 # Nd MYANMAR PAO DIGIT SEVEN
+116E1 ; 7.0 ; ; 7 # Nd MYANMAR EASTERN PWO KAREN DIGIT SEVEN
11737 ; 7.0 ; ; 7 # Nd AHOM DIGIT SEVEN
118E7 ; 7.0 ; ; 7 # Nd WARANG CITI DIGIT SEVEN
11957 ; 7.0 ; ; 7 # Nd DIVES AKURU DIGIT SEVEN
+11BF7 ; 7.0 ; ; 7 # Nd SUNUWAR DIGIT SEVEN
11C57 ; 7.0 ; ; 7 # Nd BHAIKSUKI DIGIT SEVEN
11C60 ; 7.0 ; ; 7 # No BHAIKSUKI NUMBER SEVEN
11D57 ; 7.0 ; ; 7 # Nd MASARAM GONDI DIGIT SEVEN
@@ -1393,7 +1417,7 @@ FF17 ; 7.0 ; ; 7 # Nd FULLWIDTH DIGIT SEVEN
1FBF7 ; 7.0 ; ; 7 # Nd SEGMENTED DIGIT SEVEN
20001 ; 7.0 ; ; 7 # Lo CJK UNIFIED IDEOGRAPH-20001
-# Total code points: 117
+# Total code points: 120
# ================================================
@@ -1476,9 +1500,12 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
114D8 ; 8.0 ; ; 8 # Nd TIRHUTA DIGIT EIGHT
11658 ; 8.0 ; ; 8 # Nd MODI DIGIT EIGHT
116C8 ; 8.0 ; ; 8 # Nd TAKRI DIGIT EIGHT
+116D8 ; 8.0 ; ; 8 # Nd MYANMAR PAO DIGIT EIGHT
+116E2 ; 8.0 ; ; 8 # Nd MYANMAR EASTERN PWO KAREN DIGIT EIGHT
11738 ; 8.0 ; ; 8 # Nd AHOM DIGIT EIGHT
118E8 ; 8.0 ; ; 8 # Nd WARANG CITI DIGIT EIGHT
11958 ; 8.0 ; ; 8 # Nd DIVES AKURU DIGIT EIGHT
+11BF8 ; 8.0 ; ; 8 # Nd SUNUWAR DIGIT EIGHT
11C58 ; 8.0 ; ; 8 # Nd BHAIKSUKI DIGIT EIGHT
11C61 ; 8.0 ; ; 8 # No BHAIKSUKI NUMBER EIGHT
11D58 ; 8.0 ; ; 8 # Nd MASARAM GONDI DIGIT EIGHT
@@ -1515,7 +1542,7 @@ FF18 ; 8.0 ; ; 8 # Nd FULLWIDTH DIGIT EIGHT
1F109 ; 8.0 ; ; 8 # No DIGIT EIGHT COMMA
1FBF8 ; 8.0 ; ; 8 # Nd SEGMENTED DIGIT EIGHT
-# Total code points: 112
+# Total code points: 115
# ================================================
@@ -1601,9 +1628,12 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
114D9 ; 9.0 ; ; 9 # Nd TIRHUTA DIGIT NINE
11659 ; 9.0 ; ; 9 # Nd MODI DIGIT NINE
116C9 ; 9.0 ; ; 9 # Nd TAKRI DIGIT NINE
+116D9 ; 9.0 ; ; 9 # Nd MYANMAR PAO DIGIT NINE
+116E3 ; 9.0 ; ; 9 # Nd MYANMAR EASTERN PWO KAREN DIGIT NINE
11739 ; 9.0 ; ; 9 # Nd AHOM DIGIT NINE
118E9 ; 9.0 ; ; 9 # Nd WARANG CITI DIGIT NINE
11959 ; 9.0 ; ; 9 # Nd DIVES AKURU DIGIT NINE
+11BF9 ; 9.0 ; ; 9 # Nd SUNUWAR DIGIT NINE
11C59 ; 9.0 ; ; 9 # Nd BHAIKSUKI DIGIT NINE
11C62 ; 9.0 ; ; 9 # No BHAIKSUKI NUMBER NINE
11D59 ; 9.0 ; ; 9 # Nd MASARAM GONDI DIGIT NINE
@@ -1641,7 +1671,7 @@ FF19 ; 9.0 ; ; 9 # Nd FULLWIDTH DIGIT NINE
1FBF9 ; 9.0 ; ; 9 # Nd SEGMENTED DIGIT NINE
2F890 ; 9.0 ; ; 9 # Lo CJK COMPATIBILITY IDEOGRAPH-2F890
-# Total code points: 118
+# Total code points: 121
# ================================================
diff --git a/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java b/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java
index 712d5e0c0c..c17f3c326f 100644
--- a/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java
+++ b/unicodetools/src/main/java/org/unicode/props/UcdLineParser.java
@@ -111,6 +111,11 @@ public boolean hasNext() {
return false;
}
line = line2 = rawLines.next();
+ if (line.startsWith("<<<<<<<")
+ || line.startsWith("=======")
+ || line.startsWith(">>>>>>>")) {
+ line2 = "";
+ }
++stats.lineCount;
final int hashPos = line2.indexOf('#');
if (hashPos >= 0) {
diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
index 732ec52725..dcf634227a 100644
--- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
+++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java
@@ -393,6 +393,7 @@ public enum Block_Values implements Named {
Myanmar("Myanmar"),
Myanmar_Extended_A("Myanmar_Ext_A"),
Myanmar_Extended_B("Myanmar_Ext_B"),
+ Myanmar_Extended_C("Myanmar_Ext_C"),
Nabataean("Nabataean"),
Nag_Mundari("Nag_Mundari"),
Nandinagari("Nandinagari"),
@@ -451,6 +452,7 @@ public enum Block_Values implements Named {
Specials("Specials"),
Sundanese("Sundanese"),
Sundanese_Supplement("Sundanese_Sup"),
+ Sunuwar("Sunuwar"),
Supplemental_Arrows_A("Sup_Arrows_A"),
Supplemental_Arrows_B("Sup_Arrows_B"),
Supplemental_Arrows_C("Sup_Arrows_C"),
@@ -487,6 +489,7 @@ public enum Block_Values implements Named {
Tibetan("Tibetan"),
Tifinagh("Tifinagh"),
Tirhuta("Tirhuta"),
+ Todhri("Todhri"),
Toto("Toto"),
Transport_And_Map_Symbols("Transport_And_Map"),
Unified_Canadian_Aboriginal_Syllabics("UCAS", "Canadian_Syllabics"),
@@ -1834,6 +1837,7 @@ public enum Script_Values implements Named {
Sora_Sompeng("Sora"),
Soyombo("Soyo"),
Sundanese("Sund"),
+ Sunuwar("Sunu"),
Syloti_Nagri("Sylo"),
Syriac("Syrc"),
Tagbanwa("Tagb"),
@@ -1851,6 +1855,7 @@ public enum Script_Values implements Named {
Tibetan("Tibt"),
Tirhuta("Tirh"),
Tangsa("Tnsa"),
+ Todhri("Todr"),
Toto("Toto"),
Ugaritic("Ugar"),
Vai("Vaii"),
diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java b/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java
index b46b53c81e..2d733a0544 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCA/ReorderCodes.java
@@ -139,9 +139,37 @@ public static final String getSampleCharacter(int reorderCode) {
// TODO:
// - Remove scripts supported by ICU4J UScript and CLDR ScriptMetadata.
// - Add scripts not yet supported there.
+ //
+ // See https://www.unicode.org/alloc/Pipeline.html
+ // and https://cldr.unicode.org/development/updating-codes/updating-script-metadata
switch (reorderCode) {
// case UCD_Types.Old_Hungarian:
// return "𐲡";
+ // Approved for Unicode 16:
+ case UCD_Types.Garay:
+ return "\uD803\uDD5D";
+ case UCD_Types.Gurung_Khema:
+ return "\uD818\uDD1C";
+ case UCD_Types.Kirat_Rai:
+ return "\uD81B\uDD45";
+ case UCD_Types.Ol_Onal:
+ return "\uD839\uDDD0";
+ case UCD_Types.Sunuwar:
+ return "\uD806\uDFC4";
+ case UCD_Types.Todhri:
+ return "\uD801\uDDC2";
+ case UCD_Types.Tulu_Tigalari:
+ return "\uD804\uDF92";
+
+ // Provisionally assigned so far:
+ case UCD_Types.Chisoi:
+ return "\uD81B\uDD93";
+ case UCD_Types.Sidetic:
+ return "\uD802\uDD50";
+ case UCD_Types.Tai_Yo:
+ return "\uD839\uDED5";
+ case UCD_Types.Tolong_Siki:
+ return "\uD807\uDDC6";
default:
throw new UnsupportedOperationException("unknown reorderCode " + reorderCode);
}
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
index e1ff508adc..759361106e 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java
@@ -67,6 +67,7 @@ static class Format {
Map> fileToPropertySet = new TreeMap>();
Map fileToComments = new TreeMap();
Map fileToDirectory = new TreeMap();
+ Map> propertyToOrderedValues = new TreeMap>();
Map> propertyToValueToComments =
new TreeMap>();
Map hackMap = new HashMap();
@@ -110,6 +111,12 @@ public static class PrintStyle {
// Unicode 15.1 and later LineBreak.txt and EastAsianWidth.txt, which are all generated
// in that format by some other tool.
boolean kenFile = false;
+ // Whether the file should be produced in the style of IndicPositionalCategory.txt and
+ // IndicSyllabicCategory.txt, which are both generated in that format by some other
+ // tool.
+ boolean roozbehFile = false;
+ // Whether to separate values of enumerated properties using a line of equal signs.
+ boolean separateValues = true;
boolean hackValues = false;
boolean mergeRanges = true;
String nameStyle = "none";
@@ -138,6 +145,10 @@ String parse(String options) {
interleaveValues = true;
} else if (piece.equals("kenFile")) {
kenFile = true;
+ } else if (piece.equals("roozbehFile")) {
+ roozbehFile = true;
+ } else if (piece.startsWith("separateValues=")) {
+ separateValues = afterEqualsBoolean(piece);
} else if (piece.equals("hackValues")) {
hackValues = true;
} else if (piece.equals("sortNumeric")) {
@@ -301,6 +312,10 @@ private void build() {
}
line = line.trim();
if (line.length() == 0) {
+ if (comments.length() != 0) {
+ // Preserve blank lines between comments.
+ comments += "\n";
+ }
continue;
}
if (DEBUG) {
@@ -321,6 +336,7 @@ private void build() {
comments += line;
} else {
// end of comments, roll up
+ comments = comments.trim();
if (comments.length() != 0) {
if (property != null) {
addValueComments(property, value, comments);
@@ -350,6 +366,10 @@ private void build() {
value = "";
} else if (line.startsWith("Value:")) {
value = lineValue;
+ final var values =
+ propertyToOrderedValues.computeIfAbsent(
+ property, k -> new ArrayList());
+ values.add(value);
} else if (line.startsWith("HackName:")) {
final String regularItem = Utility.getUnskeleton(lineValue, true);
hackMap.put(regularItem, lineValue);
@@ -1152,6 +1172,9 @@ public static void generatePropertyFile(String filename) throws IOException {
filename, Format.theFormat.getPrintStyle(name));
if (!ps.kenFile) {
pwProp.println();
+ if (!ps.separateValues) {
+ pwProp.println();
+ }
pwProp.println(SEPARATOR);
}
final String propComment = Format.theFormat.getValueComments(name, "");
@@ -1161,7 +1184,11 @@ public static void generatePropertyFile(String filename) throws IOException {
pwProp.println(propComment);
} else if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
pwProp.println();
- pwProp.println("# Property:\t" + name);
+ if (ps.roozbehFile) {
+ pwProp.println("# Property: " + name);
+ } else {
+ pwProp.println("# Property:\t" + name);
+ }
}
}
@@ -1182,9 +1209,12 @@ public static void generatePropertyFile(String filename) throws IOException {
v = v + " (" + v2 + ")";
}
}
- pwProp.println();
+ pwProp.println(ps.roozbehFile ? "#" : "");
pwProp.println("# All code points not explicitly listed for " + prop.getName());
- pwProp.println("# have the value " + v + ".");
+ pwProp.println(
+ "# have the value "
+ + v
+ + (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : "."));
}
if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
@@ -1254,6 +1284,21 @@ private static void writeEnumeratedValues(
temp2.addAll(aliases);
aliases = temp2;
}
+ if (ps.roozbehFile) {
+ aliases.removeIf(alias -> UnicodeProperty.compareNames(alias, ps.skipValue) == 0);
+ if (!Format.theFormat
+ .propertyToOrderedValues
+ .get(prop.getName())
+ .containsAll(aliases)) {
+ final TreeSet missingAliases = new TreeSet(aliases);
+ missingAliases.removeAll(
+ Format.theFormat.propertyToOrderedValues.get(prop.getName()));
+ throw new IllegalArgumentException(
+ "All values must be listed when using roozbehFile; missing "
+ + missingAliases);
+ }
+ aliases = Format.theFormat.propertyToOrderedValues.get(prop.getName());
+ }
if (ps.sortNumeric) {
if (DEBUG) {
System.out.println("Reordering");
@@ -1284,7 +1329,7 @@ private static void writeEnumeratedValues(
final String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue;
if (missing != null && !missing.equals(UCD_Names.NO)) {
- pw.println();
+ pw.println(ps.roozbehFile ? "#" : "");
final String propName = bf.getPropName();
// if (propName == null) propName = "";
// else if (propName.length() != 0) propName = propName + "; ";
@@ -1302,6 +1347,10 @@ private static void writeEnumeratedValues(
writeEnumeratedMissingValues(pw, overallDefault, defaultLbValues);
}
}
+ if (!ps.separateValues) {
+ pw.println();
+ pw.println(SEPARATOR.replace('=', '-'));
+ }
for (final Iterator it = aliases.iterator(); it.hasNext(); ) {
final String value = it.next();
if (DEBUG) {
@@ -1416,9 +1465,13 @@ private static void writeEnumeratedValues(
if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
pw.println();
- pw.println(SEPARATOR);
+ if (ps.separateValues) {
+ pw.println(SEPARATOR);
+ }
if (nonLongValue) {
- pw.println();
+ if (ps.separateValues) {
+ pw.println();
+ }
pw.println("# " + prop.getName() + "=" + value);
}
}
@@ -1442,6 +1495,11 @@ private static void writeEnumeratedValues(
pw.println();
// if (s.size() != 0)
bf.setMergeRanges(ps.mergeRanges);
+ bf.setShowTotal(!ps.roozbehFile);
+ if (ps.roozbehFile) {
+ bf.setRangeBreakSource(
+ ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block"));
+ }
bf.showSetNames(pw, s);
if (DEBUG) {
System.out.println(bf.showSetNames(s));
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
index af97cfdacf..dbbea74f6e 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java
@@ -503,7 +503,7 @@ private static PropertyComparison getPropertyComparison(ParsePosition pp, String
propertyComparison.valueSet = new UnicodeSet(line, pp, symbolTable);
propertyComparison.property1 = CompoundProperty.of(LATEST_PROPS, line, pp);
final int cp = line.codePointAt(pp.getIndex());
- if (cp != '=' && cp != 'x') {
+ if (cp != '=' && cp != '≠') {
throw new ParseException(line, pp.getIndex());
}
propertyComparison.shouldBeEqual = cp == '=';
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java
index c48269675d..0f12b0ffc5 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Names.java
@@ -414,19 +414,19 @@ public final class UCD_Names implements UCD_Types {
// Unicode 15
"Kawi",
"Nag_Mundari",
- // A future version of Unicode
- "Sunuwar",
- "Tulu_Tigalari",
- "Kirat_Rai",
- "Todhri",
+ // Unicode 16
"Garay",
"Gurung_Khema",
+ "Kirat_Rai",
"Ol_Onal",
+ "Sunuwar",
+ "Todhri",
+ "Tulu_Tigalari",
// Provisionally assigned
- "Sidetic",
"Chisoi",
- "Tolong_Siki",
+ "Sidetic",
"Tai_Yo",
+ "Tolong_Siki",
};
public static final Relation EXTRA_SCRIPT =
@@ -611,19 +611,19 @@ public final class UCD_Names implements UCD_Types {
// Unicode 15
"Kawi",
"Nagm",
- // A future version of Unicode
- "Qaba",
- "Qabb",
- "Qabc",
- "Qabd",
- "Qabe",
- "Qabf",
- "Qabg",
+ // Unicode 16
+ "Gara",
+ "Gukh",
+ "Krai",
+ "Onao",
+ "Sunu",
+ "Todr",
+ "Tutg",
// Provisionally assigned
- "Qabh",
- "Qabi",
- "Qabj",
- "Qabk",
+ "Chis",
+ "Sidt",
+ "Tayo",
+ "Tols",
};
static final String[] SHORT_AGE = {
diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java
index 6f5a763400..972753c371 100644
--- a/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java
+++ b/unicodetools/src/main/java/org/unicode/text/UCD/UCD_Types.java
@@ -599,20 +599,20 @@ public interface UCD_Types {
// Unicode 15
Kawi = 164,
Nag_Mundari = 165,
- // A future version of Unicode
- Sunuwar = 166,
- Tulu_Tigalari = 167,
+ // Unicode 16
+ Garay = 166,
+ Gurung_Khema = 167,
Kirat_Rai = 168,
- Todhri = 169,
- Garay = 170,
- Gurung_Khema = 171,
- Ol_Onal = 172,
+ Ol_Onal = 169,
+ Sunuwar = 170,
+ Todhri = 171,
+ Tulu_Tigalari = 172,
// Provisionally assigned
- Sidetic = 173,
- Chisoi = 174,
- Tolong_Siki = 175,
- Tai_Yo = 176,
- LIMIT_SCRIPT = Tai_Yo + 1;
+ Chisoi = 173,
+ Sidetic = 174,
+ Tai_Yo = 175,
+ Tolong_Siki = 176,
+ LIMIT_SCRIPT = Tolong_Siki + 1;
// Bidi_Paired_Bracket_Type
public static final byte BPT_N = 0, BPT_O = 1, BPT_C = 2, LIMIT_BPT = 3;
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
index 702c46ca54..db8ebd7b8f 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt
@@ -908,6 +908,376 @@ Format: kenFile skipValue=Rotated
#
Property: VerticalOrientation
+File: IndicPositionalCategory
+#
+# This file defines the following property:
+#
+# Indic_Positional_Category enumerated property
+#
+# Scope: This property is aimed at the problem of
+# the specification of syllabic structure for Indic scripts.
+# Because dependent vowels (matras), visible viramas, and other
+# characters are placed in notional slots around the consonant (or
+# consonant cluster) core of an Indic syllable, there may be
+# cooccurrence constraints or other interactions. Also, it may be
+# desirable, in cases where more than one such character may occur in
+# sequence, as for example, in a top slot and a bottom slot, to
+# specify preferred orders for spelling. As such, this property
+# is designed primarily to supplement the Indic_Syllabic_Category
+# property.
+#
+# In addition to combining marks associated with Indic scripts, the
+# Indic_Positional_Category has non-trivial values for special signs
+# associated with Indic_Syllabic_Category=Consonant_Prefixed
+# or Indic_Syllabic_Category=Consonant_Preceding_Repha. Those signs
+# have General_Category=Lo, rather than being combining marks.
+# They occur in initial position in syllabic structure. However, when
+# rendered, they appear as marks positioned with respect to another
+# base letter (usually above it). Hence, having an explicit value for
+# Indic_Positional_Category for those signs can be helpful.
+#
+# Note that this property is *not* intended as
+# a prescriptive property regarding display or font design,
+# for a number of reasons. Good font design requires information
+# that is outside the context of a character encoding standard,
+# and is best handled in other venues. For Indic dependent
+# vowels and similar characters, in particular:
+#
+# 1. Matra placement may vary somewhat based on typeface design.
+# 2. Matra placement, even within a single script, may vary
+# somewhat according to historic period or local conventions.
+# 3. Matra placement may be changed by explicit orthographic reform
+# decisions.
+# 4. Matras may ligate in various ways with a consonant (or even
+# other elements of a syllable) instead of occurring in a
+# discrete location.
+# 5. Matra display may be contextually determined. This is
+# notable, for example, in the Tamil script, where the shape
+# and placement of -u and -uu vowels depends strongly on
+# which consonant they adjoin.
+#
+# Format:
+# Field 0 Unicode code point value or range of code point values
+# Field 1 Indic_Positional_Category property value
+#
+# Field 1 is followed by a comment field, starting with the number sign '#',
+# which shows the General_Category property value, the Unicode character name
+# or names, and, in lines with ranges of code points, the code point count in
+# square brackets.
+#
+# The scripts assessed as containing dependent vowels or similar characters
+# in the structural sense used for the Indic_Positional_Category are the
+# following:
+#
+# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid,
+# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati,
+# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi,
+# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu,
+# Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek, Modi,
+# Myanmar, Nandinagari, Newa, New Tai Lue, Oriya, Rejang, Saurashtra,
+# Sharada, Siddham, Sinhala, Soyombo, Sundanese, Syloti Nagri,
+# Tagalog, Tagbanwa, Tai Tham, Tai Viet, Takri, Tamil, Telugu, Thai,
+# Tibetan, Tirhuta, and Zanabazar Square.
+#
+# All characters for all other scripts not in that list
+# take the default value for this property.
+#
+# See IndicSyllabicCategory.txt for a slightly more extended
+# list of Indic scripts, including those which do not have
+# positional characters. Currently, those additional
+# Indic scripts without positional characters are
+# Multani, Phags-pa, and Tai Le.
+#
+# Notes:
+#
+# 1. The following characters are all assigned the positional category Right,
+# but may have different positions in some cases:
+# * U+0BC1 TAMIL VOWEL SIGN U and U+0BC2 TAMIL VOWEL SIGN UU have
+# contextually variable placement in Tamil.
+# * U+0D41 MALAYALAM VOWEL SIGN U and U+0D42 MALAYALAM VOWEL SIGN UU form
+# complex ligatures with consonants in older Malayalam orthography.
+# * U+11341 GRANTHA VOWEL SIGN U and U+11342 GRANTHA VOWEL SIGN UU have
+# contextually variable placement in Grantha.
+# * U+11440 NEWA VOWEL SIGN O and U+11441 NEWA VOWEL SIGN AU have contextually
+# variable placement in Newa.
+#
+# 2. The following characters are all assigned the positional category Top,
+# but may have different positions in some cases:
+# * U+1143E NEWA VOWEL SIGN E and U+1143F NEWA VOWEL SIGN AI have contextually
+# variable placement in Newa.
+#
+# 3. The following characters are all assigned the positional category Bottom,
+# but may have different positions in some cases:
+# * U+102F MYANMAR VOWEL SIGN U and U+1030 MYANMAR VOWEL SIGN UU have
+# contextually variable placement in Myanmar.
+# * U+1A69 TAI THAM VOWEL SIGN U and U+1A6A TAI THAM VOWEL SIGN UU have
+# contextually variable placement in Tai Tham.
+#
+# 4. The following character is assigned the positional category Left, but
+# may have different positions in different styles:
+# * U+119D2 NANDINAGARI VOWEL SIGN I has stylistically variable placement
+# in Nandinagari.
+Property: Indic_Positional_Category
+Format: roozbehFile separateValues=false valueStyle=short skipValue=NA
+Value: Right
+Value: Left
+Value: Visual_Order_Left
+
+# These are dependent vowels that occur to the left of the consonant
+# letter in a syllable, but which occur in scripts using the visual order
+# model, instead of the logical order model. Because of the different
+# model, these left-side vowels occur first in the backing store (before
+# the consonant letter) and are not reordered during text rendering.
+#
+# [Derivation: Logical_Order_Exception=Yes]
+Value: Left_And_Right
+Value: Top
+Value: Bottom
+Value: Top_And_Bottom
+Value: Top_And_Right
+Value: Top_And_Left
+Value: Top_And_Left_And_Right
+Value: Bottom_And_Right
+Value: Bottom_And_Left
+Value: Top_And_Bottom_And_Right
+Value: Top_And_Bottom_And_Left
+Value: Overstruck
+
+File: IndicSyllabicCategory
+#
+# This file defines the following property:
+#
+# Indic_Syllabic_Category enumerated property
+#
+# Scope: This property is aimed at two general problem
+# areas involving the analysis and processing of Indic scripts:
+#
+# 1. Specification of syllabic structure.
+# 2. Specification of segmentation rules.
+#
+# Both of these problem areas may benefit from having defined subtypes
+# of Indic script characters which are relevant to how Indic
+# syllables (or aksaras) are constructed. Note that rules for
+# syllabic structure in Indic scripts may differ significantly
+# from how phonological syllables are defined.
+#
+# Format:
+# Field 0 Unicode code point value or range of code point values
+# Field 1 Indic_Syllabic_Category property value
+#
+# Field 1 is followed by a comment field, starting with the number sign '#',
+# which shows the General_Category property value, the Unicode character name
+# or names, and, in lines with ranges of code points, the code point count in
+# square brackets.
+#
+# The scripts assessed as Indic in the structural sense used for the
+# Indic_Syllabic_Category are the following:
+#
+# Ahom, Balinese, Batak, Bengali, Bhaiksuki, Brahmi, Buginese, Buhid,
+# Chakma, Cham, Devanagari, Dives Akuru, Dogra, Grantha, Gujarati,
+# Gunjala Gondi, Gurmukhi, Hanunoo, Javanese, Kaithi, Kannada, Kawi,
+# Kayah Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, Lepcha, Limbu,
+# Mahajani, Makasar, Malayalam, Marchen, Masaram Gondi, Meetei Mayek,
+# Modi, Multani, Myanmar, Nandinagari, Newa, New Tai Lue, Oriya,
+# Phags-pa, Rejang, Saurashtra, Sharada, Siddham, Sinhala, Soyombo,
+# Sundanese, Syloti Nagri, Tagalog, Tagbanwa, Tai Le, Tai Tham,
+# Tai Viet, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, and
+# Zanabazar Square.
+#
+# All characters for all other scripts not in that list
+# take the default value for this property, unless they
+# are individually listed in this data file.
+#
+Property: Indic_Syllabic_Category
+Format: roozbehFile valueStyle=short skipValue=Other
+Value: Bindu
+# Bindu/Anusvara (nasalization or -n)
+
+# [Not derivable]
+Value: Visarga
+# Visarga (-h)
+# Excludes letters for jihvamuliya and upadhmaniya, which are
+# related, but structured somewhat differently.
+
+# [Not derivable]
+Value: Avagraha
+# Avagraha (elision of initial a- in sandhi)
+
+# [Not derivable]
+Value: Nukta
+# Nukta (diacritic for borrowed consonants or other consonant
+# modifications). Note that while the resulting sound is typically a
+# consonant, the base letter a nukta follows may be an independent
+# vowel. For example, is used to transcribe ARABIC LETTER
+# AIN.
+
+# [Not derivable]
+Value: Virama
+# Virama (killing of inherent vowel in consonant sequence
+# or consonant stacker)
+# Only includes characters that can act both as visible killer viramas
+# and consonant stackers. Separate property values exist for characters
+# that can only act as pure killers or only as consonant stackers.
+
+# [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker)
+# - (InSC=Number_Joiner) - 2D7F]
+Value: Pure_Killer
+# Pure killer (killing of inherent vowel in consonant sequence,
+# with no consonant stacking behavior)
+
+# [Not derivable]
+Value: Invisible_Stacker
+# Invisible stacker (invisible consonant stacker virama).
+#
+# Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible
+# stacker may have a second function, changing the shape and/or location of the
+# consonant preceding it, even when there is no consonant following the
+# invisible stacker.
+
+# [Not derivable]
+Value: Vowel_Independent
+# Independent Vowels (contrasted with matras)
+
+# [Not derivable]
+Value: Vowel_Dependent
+# Dependent Vowels (contrasted with independent vowels and/or with
+# complex placement). Known as matras in Indic scripts. Also
+# includes vowel modifiers that follow dependent (and sometimes
+# independent) vowels.
+
+# [Not derivable]
+Value: Vowel
+# (Other) Vowels (reanalyzed as ordinary alphabetic letters or marks)
+
+# [Not derivable]
+Value: Consonant_Placeholder
+# Consonant Placeholder
+# This includes generic placeholders used for
+# Indic script layout (NBSP and dotted circle), as well as a few script-
+# specific vowel-holder characters which are not technically
+# consonants, but serve instead as bases for placement of vowel marks.
+
+# [Not derivable]
+Value: Consonant
+# Consonant (ordinary abugida consonants, with inherent vowels)
+
+# [Not derivable]
+Value: Consonant_Dead
+# Dead Consonant (special consonant with killed vowel)
+
+# [Not derivable]
+Value: Consonant_With_Stacker
+# Consonants that may make stacked ligatures with the next consonant
+# without the use of a virama
+
+# [Not derivable]
+Value: Consonant_Prefixed
+# Cluster-initial consonants
+
+# [Not derivable]
+Value: Consonant_Preceding_Repha
+# Repha Form of RA (reanalyzed in some scripts), when preceding the main
+# consonant.
+
+# [Not derivable]
+Value: Consonant_Initial_Postfixed
+# Consonants that succeed the main consonant in character sequences, but are
+# pronounced before it.
+
+# [Not derivable]
+Value: Consonant_Succeeding_Repha
+# Repha Form of RA (reanalyzed in some scripts), when succeeding the main
+# consonant.
+
+# [Not derivable]
+Value: Consonant_Subjoined
+# Subjoined Consonant (C2 form subtending a base consonant in Tibetan, etc.)
+
+# [Not derivable]
+Value: Consonant_Medial
+# Medial Consonant (medial liquid, occurring in clusters)
+
+# [Not derivable]
+Value: Consonant_Final
+# Final Consonant (special final forms which do not take vowels)
+
+# [Not derivable]
+Value: Consonant_Head_Letter
+# Head Letter (Tibetan)
+
+# [Not derivable]
+Value: Modifying_Letter
+# Reanalyzed letters not participating in the abugida structure, but
+# serving to modify the sound of an adjacent vowel or consonant.
+# Note that this is not the same as General_Category=Modifier_Letter.
+
+# [Not derivable]
+Value: Tone_Letter
+# Tone Letter (spacing lexical tone mark with status as a letter)
+
+# [Not derivable]
+Value: Tone_Mark
+# Tone Mark (nonspacing or spacing lexical tone mark)
+
+# [Not derivable]
+Value: Gemination_Mark
+# Gemination Mark (doubling of the preceding or following consonant)
+#
+# U+0A71 GURMUKHI ADDAK precedes the consonant it geminates, while the
+# others follow the consonant they geminate.
+
+# [Not derivable]
+Value: Cantillation_Mark
+# Cantillation Mark (recitation marks, such as svara markers for the Samaveda)
+
+# [Not derivable]
+Value: Register_Shifter
+# Register Shifter (shifts register for consonants, akin to a tone mark)
+
+# [Not derivable]
+Value: Syllable_Modifier
+# Syllable Modifier (miscellaneous combining characters that modify
+# something in the orthographic syllable they succeed or appear in)
+
+# [Not derivable]
+Value: Consonant_Killer
+# Consonant Killer (signifies that the previous consonant or consonants are
+# not pronounced)
+
+# [Not derivable]
+Value: Non_Joiner
+# Non_Joiner (Zero Width Non-Joiner)
+
+# [Not derivable]
+Value: Joiner
+# Joiner (Zero Width Joiner)
+
+# [Not derivable]
+Value: Number_Joiner
+# Number_Joiner (forms ligatures between numbers for multiplication)
+
+# [Not derivable]
+Value: Number
+# Number (can be used as vowel-holders like consonant placeholders)
+# Note: A number may even hold subjoined consonants which may in turn
+# have been formed using a virama or a stacker, e.g. the sequence
+# where THAI THAM LETTER LOW TA is subjoined to
+# TAI THAM THAM DIGIT THREE using an invisible stacker.
+
+# [Not derivable]
+Value: Brahmi_Joining_Number
+# Brahmi Joining Number (may be joined by a Number_Joiner of the same
+# script, e.g. in Brahmi)
+#
+# Note: These are different from Numbers, in the way that there is no known
+# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
+# Until such evidence is found, implementations may assume that Brahmi
+# Joining Numbers only participate in shaping with other Brahmi Joining
+# Numbers.
+
+# [Not derivable]
+
File: UnicodeData
Property: SPECIAL
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt
index d1cb10b130..fbe76d4102 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt
@@ -214,6 +214,7 @@ Music ; Musical_Symbols
Myanmar ; Myanmar
Myanmar_Ext_A ; Myanmar_Extended_A
Myanmar_Ext_B ; Myanmar_Extended_B
+Myanmar_Ext_C ; Myanmar_Extended_C
Nabataean ; Nabataean
Nag_Mundari ; Nag_Mundari
Nandinagari ; Nandinagari
@@ -272,6 +273,7 @@ Soyombo ; Soyombo
Specials ; Specials
Sundanese ; Sundanese
Sundanese_Sup ; Sundanese_Supplement
+Sunuwar ; Sunuwar
Super_And_Sub ; Superscripts_And_Subscripts
Sup_Arrows_A ; Supplemental_Arrows_A
Sup_Arrows_B ; Supplemental_Arrows_B
@@ -308,6 +310,7 @@ Thai ; Thai
Tibetan ; Tibetan
Tifinagh ; Tifinagh
Tirhuta ; Tirhuta
+Todhri ; Todhri
Toto ; Toto
Transport_And_Map ; Transport_And_Map_Symbols
Ugaritic ; Ugaritic
diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
index 511f0967b4..8ee8762b36 100644
--- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
+++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt
@@ -405,12 +405,30 @@ Let $identifier_extend = [\p{GC=Mn}\p{GC=Mc}\p{GC=Nd}\p{GC=Pc}]
In \P{U-1:GC=Cn} ccc=U-1:ccc
# Canonical decompositions (minus exclusions) must be identical across releases (also required by strong normalization stability),
-# except where a character and at lease one character in its decomposition are both new in the release.
-Let $NFC_Exceptions = [\U0001109A\U0001109C\U000110AB[\U0001112E\U0001112F \U0001134B-\U0001134C \U000114BB-\U000114BC \U000114BE \U000115BA-\U000115BB] \U00011938]
-# 6.1.0 Added CHAKMA VOWEL SIGN O..CHAKMA VOWEL SIGN AU
-# 7.0 Added 1134B..1134C, 114BB..114BC, 114BE, and 115BA..115BB
-# 13.0 Added 11938 DIVES AKURU VOWEL SIGN O
-[\p{Decomposition_Type=Canonical} - \p{Full_Composition_Exclusion} - $NFC_Exceptions] = [\p{U-1:Decomposition_Type=Canonical} - \p{U-1:Full_Composition_Exclusion} - $NFC_Exceptions]
+# except where a character and at least one character in its decomposition are both new in the release.
+Let $New_Decompositions = [[\p{Decomposition_Type=Canonical} - \p{Full_Composition_Exclusion}] - [\p{U-1:Decomposition_Type=Canonical} - \p{U-1:Full_Composition_Exclusion}]]
+$New_Decompositions ⊆ \p{U-1:GC=Cn}
+# Stripping previously-unassigned characters from the current NFD does
+# something, that is, the decomposition contains newly-assigned characters.
+In $New_Decompositions toNFD * \P{U-1:GC=Cn} ≠ toNFD
+
+Let $Unicode_13_Decompositions = [[\p{U13.0.0:Decomposition_Type=Canonical} - \p{U13.0.0:Full_Composition_Exclusion}] - [\p{U12.1.0:Decomposition_Type=Canonical} - \p{U12.1.0:Full_Composition_Exclusion}]]
+$Unicode_13_Decompositions ⊆ \p{U12.1.0:GC=Cn}
+In $Unicode_13_Decompositions toNFD * \P{U12.1.0:GC=Cn} ≠ toNFD
+$Unicode_13_Decompositions = [\U00011938]
+$Unicode_13_Decompositions = [\p{Name=DIVES AKURU VOWEL SIGN O}]
+
+Let $Unicode_7_Decompositions = [[\p{U7.0.0:Decomposition_Type=Canonical} - \p{U7.0.0:Full_Composition_Exclusion}] - [\p{U6.3.0:Decomposition_Type=Canonical} - \p{U6.3.0:Full_Composition_Exclusion}]]
+$Unicode_7_Decompositions ⊆ \p{U6.3.0:GC=Cn}
+In $Unicode_7_Decompositions toNFD * \P{U6.3.0:GC=Cn} ≠ toNFD
+$Unicode_7_Decompositions = [\U0001134B-\U0001134C \U000114BB-\U000114BC \U000114BE \U000115BA-\U000115BB]
+$Unicode_7_Decompositions ⊆ [\p{Name=/^(GRANTHA|TIRHUTA|SIDDHAM) VOWEL SIGN /}]
+
+Let $Unicode_6_1_Decompositions = [[\p{U6.1.0:Decomposition_Type=Canonical} - \p{U6.1.0:Full_Composition_Exclusion}] - [\p{U6.0.0:Decomposition_Type=Canonical} - \p{U6.0.0:Full_Composition_Exclusion}]]
+$Unicode_6_1_Decompositions ⊆ \p{U6.0.0:GC=Cn}
+In $Unicode_6_1_Decompositions toNFD * \P{U6.0.0:GC=Cn} ≠ toNFD
+$Unicode_6_1_Decompositions = [\U0001112E-\U0001112F]
+$Unicode_6_1_Decompositions ⊆ [\p{Name=/^CHAKMA VOWEL SIGN /}]
# Stability: All characters other than those with General_Category property values Spacing_Mark (Mc) and Nonspacing_Mark (Mn) have the Canonical_Combining_Class property value 0.
\p{CCC=0} ⊇ [^ \p{GC=Mc} \p{GC=Mn}]
@@ -512,7 +530,10 @@ Show [\u20b9]
# exceptions. Should such exceptions arise, they can be added to the definition of
# $nonAlphabeticBindus to avoid a failure on this test.
Let $nonAlphabeticBindus = []
-[\p{InSc=Bindu} - $nonAlphabeticBindus - \p{Alphabetic}] = []
+[\p{InSc=Bindu} - \p{Alphabetic}] = $nonAlphabeticBindus
+
+Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MAITAIKHU}\N{LIMBU SIGN KEMPHRENG}\N{SHARADA VOWEL MODIFIER MARK}\N{SHARADA EXTRA SHORT VOWEL MARK}]
+[\p{InSC=Vowel_Dependent} - \p{Alphabetic}] = $nonAlphabeticDependentVowels
##########################
# LineBreak property