From 8046abb15e2fec1aa42df83204b97f829833de5f Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 18:56:41 +0200 Subject: [PATCH 01/24] Allow for UCA and security failures on repertoire PRs --- .github/workflows/cli-build-instructions.yml | 44 +++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index c818d6948..3f05c4c83 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,6 +275,21 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + diff base-repertoire.txt merged-repertoire.txt + REPERTOIRE_CHANGED = $? - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -320,6 +335,10 @@ jobs: mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file compgen -G "../Generated/UCA/*/CheckCollationValidity.html" + if $REPERTOIRE_CHANGED + then exit 0 + else exit $? + fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -333,6 +352,21 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + diff base-repertoire.txt merged-repertoire.txt + REPERTOIRE_CHANGED = $? - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -372,6 +406,14 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS + if $REPERTOIRE_CHANGED + then $ERROR = "::error" + else $ERROR = "::notice" + fi + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" + if $REPERTOIRE_CHANGED + then exit 0 + else exit $? + fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From ff3a3b856cfdf80ec488f7f4d8c76c508078ebb6 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 20:51:26 +0200 Subject: [PATCH 02/24] use $GITHUB_ENV --- .github/workflows/cli-build-instructions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 3f05c4c83..84e5be898 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -366,7 +366,7 @@ jobs: sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt diff base-repertoire.txt merged-repertoire.txt - REPERTOIRE_CHANGED = $? + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} From 39739a7edd56889653b4fa5083354d83abf5dee0 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 21:03:24 +0200 Subject: [PATCH 03/24] someday I will learn bash syntax --- .github/workflows/cli-build-instructions.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 84e5be898..38e2d2ccf 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -289,7 +289,7 @@ jobs: sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt diff base-repertoire.txt merged-repertoire.txt - REPERTOIRE_CHANGED = $? + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -407,8 +407,8 @@ jobs: run: | cd unicodetools/mine/src if $REPERTOIRE_CHANGED - then $ERROR = "::error" - else $ERROR = "::notice" + then ERROR="::error" + else ERROR="::notice" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" if $REPERTOIRE_CHANGED From e031fc6f58d275e3ada7b36353de89a957a54917 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 21:08:47 +0200 Subject: [PATCH 04/24] bash my head against the shell --- .github/workflows/cli-build-instructions.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 38e2d2ccf..0cc251349 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -335,7 +335,7 @@ jobs: mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file compgen -G "../Generated/UCA/*/CheckCollationValidity.html" - if $REPERTOIRE_CHANGED + if [[$REPERTOIRE_CHANGED]] then exit 0 else exit $? fi @@ -406,12 +406,12 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - if $REPERTOIRE_CHANGED + if [[$REPERTOIRE_CHANGED]] then ERROR="::error" else ERROR="::notice" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" - if $REPERTOIRE_CHANGED + if [[$REPERTOIRE_CHANGED]] then exit 0 else exit $? fi From 140ab5c328e4c73ba56207db3d637b476843ef1a Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 21:31:16 +0200 Subject: [PATCH 05/24] blaargh --- .github/workflows/cli-build-instructions.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 0cc251349..1b591e9d3 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -335,7 +335,7 @@ jobs: mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file compgen -G "../Generated/UCA/*/CheckCollationValidity.html" - if [[$REPERTOIRE_CHANGED]] + if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 else exit $? fi @@ -406,12 +406,12 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src - if [[$REPERTOIRE_CHANGED]] + if [[ $REPERTOIRE_CHANGED -ne 0 ]] then ERROR="::error" else ERROR="::notice" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" - if [[$REPERTOIRE_CHANGED]] + if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 else exit $? fi From 5b27532554256e7d5821bde0838a6fd09b3b1792 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 21:41:31 +0200 Subject: [PATCH 06/24] meow --- .github/workflows/cli-build-instructions.yml | 22 ++------------------ 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 1b591e9d3..95cdeba90 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,21 +275,6 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src - - name: Checkout base UnicodeData.txt - if: ${{ github.event_name == 'pull_request'}} - uses: actions/checkout@v3 - with: - ref: ${{ github.event.pull_request.base.sha }} - path: base - sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt - - name: Compare repertoire - if: ${{ github.event_name == 'pull_request'}} - run: | - # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). - sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt - sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt - diff base-repertoire.txt merged-repertoire.txt - echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -335,10 +320,6 @@ jobs: mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file compgen -G "../Generated/UCA/*/CheckCollationValidity.html" - if [[ $REPERTOIRE_CHANGED -ne 0 ]] - then exit 0 - else exit $? - fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} @@ -411,9 +392,10 @@ jobs: else ERROR="::notice" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" + STATUS=$? if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 - else exit $? + else exit $STATUS fi env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 75d352c8ca18d19a6fc4179850b724bce704f505 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 21:51:03 +0200 Subject: [PATCH 07/24] break the security tests --- .../resources/org/unicode/text/UCD/SecurityInvariantTest.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt index 838516704..e0031ce68 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt @@ -10,7 +10,7 @@ # “Multiple values are not assigned to characters with strong restrictions: # Not_Character, Deprecated, Default_Ignorable, Not_NFKC.” -\p{Identifier_Type=Deprecated} = \p{Deprecated} +\p{Identifier_Type=Deprecated} = [\p{Deprecated}a] # When a code point fits the criteria for multiple restrictions, then the strongest one wins. # For example, Default_Ignorable is trumped by unassigned and Deprecated. From f058339344fada639959e8ecb9e83b636e1d1d88 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:20:47 +0200 Subject: [PATCH 08/24] PIPESTATUS --- .github/workflows/cli-build-instructions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 95cdeba90..db89ddd11 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -392,7 +392,7 @@ jobs: else ERROR="::notice" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" - STATUS=$? + STATUS=${PIPESTATUS[0]} if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 else exit $STATUS From a5cffbb2e69be832f6356d15b5d93be10edd4d43 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:29:58 +0200 Subject: [PATCH 09/24] change the repertoire --- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..4ba1f4a47 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -23589,7 +23589,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 130DD;EGYPTIAN HIEROGLYPH E010;Lo;0;L;;;;;N;;;;; 130DE;EGYPTIAN HIEROGLYPH E011;Lo;0;L;;;;;N;;;;; 130DF;EGYPTIAN HIEROGLYPH E012;Lo;0;L;;;;;N;;;;; -130E0;EGYPTIAN HIEROGLYPH E013;Lo;0;L;;;;;N;;;;; +130E0;MEOW;Lo;0;L;;;;;N;;;;; 130E1;EGYPTIAN HIEROGLYPH E014;Lo;0;L;;;;;N;;;;; 130E2;EGYPTIAN HIEROGLYPH E015;Lo;0;L;;;;;N;;;;; 130E3;EGYPTIAN HIEROGLYPH E016;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..71d99a145 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -34254,7 +34254,7 @@ FFFD ; REPLACEMENT CHARACTER 130DD ; EGYPTIAN HIEROGLYPH E010 130DE ; EGYPTIAN HIEROGLYPH E011 130DF ; EGYPTIAN HIEROGLYPH E012 -130E0 ; EGYPTIAN HIEROGLYPH E013 +130E0 ; MEOW 130E1 ; EGYPTIAN HIEROGLYPH E014 130E2 ; EGYPTIAN HIEROGLYPH E015 130E3 ; EGYPTIAN HIEROGLYPH E016 From 368fe375b4a3caa900ef96758e6af2487745d900 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:38:56 +0200 Subject: [PATCH 10/24] exit 0 --- .github/workflows/cli-build-instructions.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index db89ddd11..19e937ce9 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -348,6 +348,7 @@ jobs: sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt diff base-repertoire.txt merged-repertoire.txt echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" + exit 0 - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} From 25dec81634c9f90b003d0fe69ba749d2505b2e67 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:44:25 +0200 Subject: [PATCH 11/24] () --- .github/workflows/cli-build-instructions.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 19e937ce9..45cf0748f 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -346,9 +346,8 @@ jobs: # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt - diff base-repertoire.txt merged-repertoire.txt + (diff base-repertoire.txt merged-repertoire.txt) echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - exit 0 - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} From 03a4395b568b47ae82904887c1e2be7ec2243a22 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:47:27 +0200 Subject: [PATCH 12/24] gaaah --- .github/workflows/cli-build-instructions.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 45cf0748f..94f4bae78 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -346,8 +346,8 @@ jobs: # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt - (diff base-repertoire.txt merged-repertoire.txt) - echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" + DIFF=diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$DIFF" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} From ee872c16c69a37a9164524053eec44e3bd2906a9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 22:56:57 +0200 Subject: [PATCH 13/24] aoeuaosenthu --- .github/workflows/cli-build-instructions.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 94f4bae78..6f4f4737a 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -346,7 +346,7 @@ jobs: # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt - DIFF=diff base-repertoire.txt merged-repertoire.txt + DIFF=(diff base-repertoire.txt merged-repertoire.txt) echo "REPERTOIRE_CHANGED=$DIFF" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref From 5e504b2fe35d5cb03a3e710e88be50aff4ec0875 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:01:30 +0200 Subject: [PATCH 14/24] plumbing --- .github/workflows/cli-build-instructions.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 6f4f4737a..d9e17280f 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -391,7 +391,8 @@ jobs: then ERROR="::error" else ERROR="::notice" fi - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" + set +o pipefail + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" STATUS=${PIPESTATUS[0]} if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 From d6290001ac8a2c7808bd09d9ed3fa5bd3727dc3e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:03:24 +0200 Subject: [PATCH 15/24] logic is hard --- .github/workflows/cli-build-instructions.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index d9e17280f..2a9643b29 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -388,11 +388,10 @@ jobs: run: | cd unicodetools/mine/src if [[ $REPERTOIRE_CHANGED -ne 0 ]] - then ERROR="::error" - else ERROR="::notice" + then ERROR="::notice" + else ERROR="::error" fi - set +o pipefail - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" STATUS=${PIPESTATUS[0]} if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 From aca41bce74220afa008b060f18f972e7d5336225 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:20:30 +0200 Subject: [PATCH 16/24] traces --- .github/workflows/cli-build-instructions.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 2a9643b29..e7127ad49 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -347,6 +347,7 @@ jobs: sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt DIFF=(diff base-repertoire.txt merged-repertoire.txt) + echo "REPERTOIRE_CHANGED=$DIFF" echo "REPERTOIRE_CHANGED=$DIFF" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref @@ -387,11 +388,12 @@ jobs: - name: Run invariant tests run: | cd unicodetools/mine/src + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" if [[ $REPERTOIRE_CHANGED -ne 0 ]] then ERROR="::notice" else ERROR="::error" fi - MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS | sed "s/^::error/$ERROR/" + MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" STATUS=${PIPESTATUS[0]} if [[ $REPERTOIRE_CHANGED -ne 0 ]] then exit 0 From c0af5448396efbd7f39725057bf9333ce5008ebc Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:24:56 +0200 Subject: [PATCH 17/24] gaaaah --- .github/workflows/cli-build-instructions.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index e7127ad49..912e5002d 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -346,9 +346,9 @@ jobs: # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt - DIFF=(diff base-repertoire.txt merged-repertoire.txt) - echo "REPERTOIRE_CHANGED=$DIFF" - echo "REPERTOIRE_CHANGED=$DIFF" >> "$GITHUB_ENV" + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} From 05ed720a1078989c9801e1cde416af6d1fbd2164 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:41:54 +0200 Subject: [PATCH 18/24] UCA too --- .github/workflows/cli-build-instructions.yml | 24 ++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cli-build-instructions.yml b/.github/workflows/cli-build-instructions.yml index 912e5002d..a3a12dc65 100644 --- a/.github/workflows/cli-build-instructions.yml +++ b/.github/workflows/cli-build-instructions.yml @@ -275,6 +275,22 @@ jobs: with: repository: unicode-org/unicodetools path: unicodetools/mine/src + - name: Checkout base UnicodeData.txt + if: ${{ github.event_name == 'pull_request'}} + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + sparse-checkout: unicodetools/data/ucd/dev/UnicodeData.txt + - name: Compare repertoire + if: ${{ github.event_name == 'pull_request'}} + run: | + # Look for changes affecting the first two fields of UnicodeData.txt (code point and name). + sed 's/^\([^;]*;[^;]*\);.*$/\1/' unicodetools/mine/src/unicodetools/data/ucd/dev/UnicodeData.txt > merged-repertoire.txt + sed 's/^\([^;]*;[^;]*\);.*$/\1/' base/unicodetools/data/ucd/dev/UnicodeData.txt > base-repertoire.txt + set +e + diff base-repertoire.txt merged-repertoire.txt + echo "REPERTOIRE_CHANGED=$?" >> "$GITHUB_ENV" - name: Get the CLDR_REF from pom.xml id: cldr_ref run: echo "CLDR_REF="$(mvn --file unicodetools/mine/src/pom.xml help:evaluate -Dexpression=cldr.version -q -DforceStdout | cut -d- -f3) >> $GITHUB_OUTPUT && cat ${GITHUB_OUTPUT} @@ -316,6 +332,10 @@ jobs: - name: Run command - UCA - collation validity log run: | cd unicodetools/mine/src + echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] + then set +e + fi # invoke main() in class ...UCA.Main mvn -s .github/workflows/mvn-settings.xml compile exec:java -Dexec.mainClass="org.unicode.text.UCA.Main" -Dexec.args="writeCollationValidityLog ICU" -am -pl unicodetools -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION # check for output file @@ -389,13 +409,13 @@ jobs: run: | cd unicodetools/mine/src echo "REPERTOIRE_CHANGED=$REPERTOIRE_CHANGED" - if [[ $REPERTOIRE_CHANGED -ne 0 ]] + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] then ERROR="::notice" else ERROR="::error" fi MAVEN_OPTS="-ea" mvn -s .github/workflows/mvn-settings.xml test -am -pl unicodetools -Dtest=TestTestUnicodeInvariants#testSecurityInvariants -DCLDR_DIR=$(cd ../../../cldr/mine/src ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) -DUVERSION=$CURRENT_UVERSION -DfailIfNoTests=false -DEMIT_GITHUB_ERRORS 2>&1 | sed "s/^::error/$ERROR/" STATUS=${PIPESTATUS[0]} - if [[ $REPERTOIRE_CHANGED -ne 0 ]] + if [[ ${REPERTOIRE_CHANGED:-0} -ne 0 ]] then exit 0 else exit $STATUS fi From 9bd67946d9ec4798ce5b02697eaab46608c8ba27 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 5 Jun 2024 23:49:35 +0200 Subject: [PATCH 19/24] Revert "mind the gap" This reverts commit 6c2ff025a0c416940c679421220d1534d9943c47. --- .../java/org/unicode/text/UCA/PrimariesToFractional.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java b/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java index 34ef90337..334f35fa6 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java +++ b/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java @@ -724,11 +724,11 @@ public PrimariesToFractional(UCA uca) { setOptionsForScript(UCD_Types.Vithkuqi).noTwoBytePrimariesIfVariants(); // Extinct script, use three-byte primaries for the few characters with variants. setOptionsForScript(UCD_Types.Elymaic).noTwoBytePrimariesIfVariants(); - // Large Excluded Script, minimal gaps. - setOptionsForScript(UCD_Types.Egyptian_Hieroglyphs).newByte().minimalGap3(); + // Minor script, avoid lead byte overflow. + setOptionsForScript(UCD_Types.Egyptian_Hieroglyphs).newByte(); // Register the scripts as aliases. setOptionsForScripts(UCD_Types.Meroitic_Cursive, UCD_Types.Meroitic_Hieroglyphs); - // Large Excluded Script, minimal gaps. + // Larged Excluded Script, minimal gaps. setOptionsForScripts(UCD_Types.Tangut).minimalGap3(); // Han uses many lead bytes, so that tailoring tens of thousands of characters // can use many two-byte primaries. From 2c1c296ed470362cc2cc13ef714cb799d521113e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 6 Jun 2024 00:46:15 +0200 Subject: [PATCH 20/24] A better repertoire change --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +-- .../data/ucd/dev/DerivedCoreProperties.txt | 34 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 3 +- .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++-- .../dev/extracted/DerivedCombiningClass.txt | 6 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++--- .../data/ucd/dev/extracted/DerivedName.txt | 7 ++-- 15 files changed, 60 insertions(+), 57 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..940517c3d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# Date: 2024-06-05, 22:43:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2044,6 +2044,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1DF2B ; 16.0 # LATIN SMALL LETTER MEOW 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR @@ -2057,6 +2058,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5185 +# Total code points: 5186 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 88df29b3b..661604f76 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-02, 15:02:37 GMT +# Date: 2024-06-05, 22:44:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1370,7 +1370,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Alphabetic # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142759 +# Total code points: 142760 # ================================================ @@ -2135,11 +2135,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Lowercase # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2569 +# Total code points: 2570 # ================================================ @@ -2981,14 +2981,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Cased # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4578 +# Total code points: 4579 # ================================================ @@ -6901,7 +6901,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; ID_Start # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141269 +# Total code points: 141270 # ================================================ @@ -8289,7 +8289,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; ID_Continue # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144542 # ================================================ @@ -9087,7 +9087,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; XID_Start # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141246 +# Total code points: 141247 # ================================================ @@ -10476,7 +10476,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; XID_Continue # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144523 # ================================================ @@ -12691,7 +12691,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Grapheme_Base # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -12811,7 +12811,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152738 +# Total code points: 152739 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..599de08c7 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-06-05, 22:44:23 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2467,7 +2467,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF00..1DF09 ; N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; N # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9dc61d95d..c411fc91a 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:19 GMT +# Date: 2024-06-05, 22:44:24 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3382,7 +3382,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF00..1DF09 ; AL # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; AL # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; AL # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..9b7138c83 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-06-05, 22:45:03 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -700,9 +700,9 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Latin # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW -# Total code points: 1487 +# Total code points: 1488 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 4ba1f4a47..d5bf5bba7 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -23589,7 +23589,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 130DD;EGYPTIAN HIEROGLYPH E010;Lo;0;L;;;;;N;;;;; 130DE;EGYPTIAN HIEROGLYPH E011;Lo;0;L;;;;;N;;;;; 130DF;EGYPTIAN HIEROGLYPH E012;Lo;0;L;;;;;N;;;;; -130E0;MEOW;Lo;0;L;;;;;N;;;;; +130E0;EGYPTIAN HIEROGLYPH E013;Lo;0;L;;;;;N;;;;; 130E1;EGYPTIAN HIEROGLYPH E014;Lo;0;L;;;;;N;;;;; 130E2;EGYPTIAN HIEROGLYPH E015;Lo;0;L;;;;;N;;;;; 130E3;EGYPTIAN HIEROGLYPH E016;Lo;0;L;;;;;N;;;;; @@ -35656,6 +35656,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF28;LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF29;LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF2A;LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; +1DF2B;LATIN SMALL LETTER MEOW;Ll;0;L;;;;;N;;;;; 1E000;COMBINING GLAGOLITIC LETTER AZU;Mn;230;NSM;;;;;N;;;;; 1E001;COMBINING GLAGOLITIC LETTER BUKY;Mn;230;NSM;;;;;N;;;;; 1E002;COMBINING GLAGOLITIC LETTER VEDE;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..5aafcd6ef 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-06-05, 22:45:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2319,7 +2319,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF00..1DF09 ; R # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; R # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; R # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; R # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; R # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 39fdb57c0..3f75bc98e 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-13, 20:53:44 GMT +# Date: 2024-06-05, 22:45:04 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1316,11 +1316,11 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Lower # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2522 +# Total code points: 2523 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 925ea3c48..1c19891cb 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-04-30, 21:48:43 GMT +# Date: 2024-06-05, 22:45:07 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1300,7 +1300,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ALetter # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; ALetter # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1355,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33791 +# Total code points: 33792 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..33137935d 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# Date: 2024-06-05, 22:44:11 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1174,7 +1174,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; L # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1214,7 +1214,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815350 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..40fd83b91 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# Date: 2024-06-05, 22:44:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1935,7 +1935,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF00..1DF09 ; 0 # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; 0 # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; 0 # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; 0 # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; 0 # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. +# The above property value applies to 821580 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..e5c196450 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-06-05, 22:44:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,7 +1964,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF00..1DF09 ; N # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; N # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; N # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2103,7 +2103,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761098 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..857e5639f 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-06-05, 22:44:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -633,7 +633,7 @@ FFFE..FFFF ; Cn # [2] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. 1DF1F..1DF24 ; Cn # [6] .. -1DF2B..1DFFF ; Cn # [213] .. +1DF2C..1DFFF ; Cn # [212] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819532 # ================================================ @@ -2071,10 +2071,10 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; Ll # [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2258 +# Total code points: 2259 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 866536783..f0ebf40f8 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:14 GMT +# Date: 2024-06-05, 22:44:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757652 code points not listed here. +# Total code points: 895120 # ================================================ @@ -1521,7 +1521,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF00..1DF09 ; AL # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2A ; AL # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF25..1DF2B ; AL # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; AL # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1615,7 +1615,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26690 +# Total code points: 26691 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 71d99a145..b26debb18 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-06-05, 22:44:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -34254,7 +34254,7 @@ FFFD ; REPLACEMENT CHARACTER 130DD ; EGYPTIAN HIEROGLYPH E010 130DE ; EGYPTIAN HIEROGLYPH E011 130DF ; EGYPTIAN HIEROGLYPH E012 -130E0 ; MEOW +130E0 ; EGYPTIAN HIEROGLYPH E013 130E1 ; EGYPTIAN HIEROGLYPH E014 130E2 ; EGYPTIAN HIEROGLYPH E015 130E3 ; EGYPTIAN HIEROGLYPH E016 @@ -41461,6 +41461,7 @@ FFFD ; REPLACEMENT CHARACTER 1DF28 ; LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK 1DF29 ; LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK 1DF2A ; LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK +1DF2B ; LATIN SMALL LETTER MEOW 1E000 ; COMBINING GLAGOLITIC LETTER AZU 1E001 ; COMBINING GLAGOLITIC LETTER BUKY 1E002 ; COMBINING GLAGOLITIC LETTER VEDE @@ -45367,6 +45368,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 154999 # EOF From d8f557dcb497f0141223a7ebada2b3a088d42426 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 6 Jun 2024 00:57:46 +0200 Subject: [PATCH 21/24] =?UTF-8?q?Revert=20the=20re=CC=81pertoire=20change?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 ++- .../data/ucd/dev/DerivedCoreProperties.txt | 34 +++++++++---------- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +-- unicodetools/data/ucd/dev/LineBreak.txt | 4 +-- unicodetools/data/ucd/dev/Scripts.txt | 6 ++-- unicodetools/data/ucd/dev/UnicodeData.txt | 1 - .../data/ucd/dev/VerticalOrientation.txt | 4 +-- .../dev/auxiliary/SentenceBreakProperty.txt | 6 ++-- .../ucd/dev/auxiliary/WordBreakProperty.txt | 6 ++-- .../ucd/dev/extracted/DerivedBidiClass.txt | 6 ++-- .../dev/extracted/DerivedCombiningClass.txt | 6 ++-- .../dev/extracted/DerivedEastAsianWidth.txt | 6 ++-- .../dev/extracted/DerivedGeneralCategory.txt | 10 +++--- .../ucd/dev/extracted/DerivedLineBreak.txt | 10 +++--- .../data/ucd/dev/extracted/DerivedName.txt | 5 ++- 15 files changed, 55 insertions(+), 58 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index 940517c3d..b4dcd2e48 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-06-05, 22:43:48 GMT +# Date: 2024-04-30, 21:48:12 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2044,7 +2044,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET -1DF2B ; 16.0 # LATIN SMALL LETTER MEOW 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR @@ -2058,6 +2057,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5186 +# Total code points: 5185 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 661604f76..88df29b3b 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-06-05, 22:44:14 GMT +# Date: 2024-05-02, 15:02:37 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1370,7 +1370,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Alphabetic # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Alphabetic # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -1441,7 +1441,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 142760 +# Total code points: 142759 # ================================================ @@ -2135,11 +2135,11 @@ FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Lowercase # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Lowercase # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; Lowercase # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2570 +# Total code points: 2569 # ================================================ @@ -2981,14 +2981,14 @@ FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Cased # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Cased # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; Cased # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA 1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z 1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 4579 +# Total code points: 4578 # ================================================ @@ -6901,7 +6901,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; ID_Start # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; ID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; ID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -6962,7 +6962,7 @@ FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141270 +# Total code points: 141269 # ================================================ @@ -8289,7 +8289,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; ID_Continue # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; ID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -8370,7 +8370,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144542 +# Total code points: 144541 # ================================================ @@ -9087,7 +9087,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; XID_Start # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; XID_Start # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; XID_Start # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -9148,7 +9148,7 @@ FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGU 30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; XID_Start # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 141247 +# Total code points: 141246 # ================================================ @@ -10476,7 +10476,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; XID_Continue # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; XID_Continue # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -10557,7 +10557,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144523 +# Total code points: 144522 # ================================================ @@ -12691,7 +12691,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Grapheme_Base # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Grapheme_Base # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; Grapheme_Base # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -12811,7 +12811,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152739 +# Total code points: 152738 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 599de08c7..99f7a31ea 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-06-05, 22:44:23 GMT +# Date: 2024-04-30, 21:48:20 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2467,7 +2467,7 @@ FFFD ; A # So REPLACEMENT CHARACTER 1DF00..1DF09 ; N # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; N # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; N # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index c411fc91a..9dc61d95d 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-06-05, 22:44:24 GMT +# Date: 2024-05-11, 16:57:19 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3382,7 +3382,7 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1DF00..1DF09 ; AL # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; AL # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; AL # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; CM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; CM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 9b7138c83..443a6d2dd 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,5 +1,5 @@ # Scripts-16.0.0.txt -# Date: 2024-06-05, 22:45:03 GMT +# Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -700,9 +700,9 @@ FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Latin # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Latin # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -# Total code points: 1488 +# Total code points: 1487 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index d5bf5bba7..64258a373 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -35656,7 +35656,6 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1DF28;LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF29;LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; 1DF2A;LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK;Ll;0;L;;;;;N;;;;; -1DF2B;LATIN SMALL LETTER MEOW;Ll;0;L;;;;;N;;;;; 1E000;COMBINING GLAGOLITIC LETTER AZU;Mn;230;NSM;;;;;N;;;;; 1E001;COMBINING GLAGOLITIC LETTER BUKY;Mn;230;NSM;;;;;N;;;;; 1E002;COMBINING GLAGOLITIC LETTER VEDE;Mn;230;NSM;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 5aafcd6ef..1ebcd7228 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-06-05, 22:45:07 GMT +# Date: 2024-04-30, 21:48:42 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2319,7 +2319,7 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1DF00..1DF09 ; R # Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; R # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; R # Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; R # Ll [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; R # Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; R # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; R # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; R # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 3f75bc98e..39fdb57c0 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-06-05, 22:45:04 GMT +# Date: 2024-05-13, 20:53:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1316,11 +1316,11 @@ FF41..FF5A ; Lower # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN 1D7CB ; Lower # L& MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Lower # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Lower # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Lower # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Lower # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; Lower # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E922..1E943 ; Lower # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2523 +# Total code points: 2522 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 1c19891cb..925ea3c48 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-06-05, 22:45:07 GMT +# Date: 2024-04-30, 21:48:43 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1300,7 +1300,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1DF00..1DF09 ; ALetter # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; ALetter # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; ALetter # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; ALetter # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; ALetter # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; ALetter # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; ALetter # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; ALetter # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1355,7 +1355,7 @@ FFDA..FFDC ; ALetter # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL 1F150..1F169 ; ALetter # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; ALetter # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 33792 +# Total code points: 33791 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 33137935d..2aceac0aa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-06-05, 22:44:11 GMT +# Date: 2024-04-30, 21:48:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1174,7 +1174,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; L # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; L # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; L # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1214,7 +1214,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815350 code points not listed here. +# The above property value applies to 815351 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index 40fd83b91..a5d57af96 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-06-05, 22:44:13 GMT +# Date: 2024-04-30, 21:48:15 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1935,7 +1935,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1DF00..1DF09 ; 0 # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; 0 # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; 0 # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; 0 # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; 0 # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; 0 # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; 0 # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; 0 # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -2060,7 +2060,7 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821580 code points not listed here. +# The above property value applies to 821581 code points not listed here. # Total code points: 1113178 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index e5c196450..cc1d91aaa 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-06-05, 22:44:17 GMT +# Date: 2024-04-30, 21:48:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1964,7 +1964,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1DF00..1DF09 ; N # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; N # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; N # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; N # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; N # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E000..1E006 ; N # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE 1E008..1E018 ; N # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU 1E01B..1E021 ; N # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI @@ -2103,7 +2103,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761098 code points not listed here. +# The above property value applies to 761099 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 857e5639f..07bf7bca9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-06-05, 22:44:17 GMT +# Date: 2024-04-30, 21:48:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -633,7 +633,7 @@ FFFE..FFFF ; Cn # [2] .. 1DAA0 ; Cn # 1DAB0..1DEFF ; Cn # [1104] .. 1DF1F..1DF24 ; Cn # [6] .. -1DF2C..1DFFF ; Cn # [212] .. +1DF2B..1DFFF ; Cn # [213] .. 1E007 ; Cn # 1E019..1E01A ; Cn # [2] .. 1E022 ; Cn # @@ -747,7 +747,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819532 +# Total code points: 819533 # ================================================ @@ -2071,10 +2071,10 @@ FF41..FF5A ; Ll # [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL 1D7CB ; Ll # MATHEMATICAL BOLD SMALL DIGAMMA 1DF00..1DF09 ; Ll # [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0B..1DF1E ; Ll # [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; Ll # [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; Ll # [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E922..1E943 ; Ll # [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA -# Total code points: 2259 +# Total code points: 2258 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index f0ebf40f8..866536783 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-06-05, 22:44:19 GMT +# Date: 2024-05-11, 16:57:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757652 code points not listed here. -# Total code points: 895120 +# The above property value applies to 757653 code points not listed here. +# Total code points: 895121 # ================================================ @@ -1521,7 +1521,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1DF00..1DF09 ; AL # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK 1DF0A ; AL # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK 1DF0B..1DF1E ; AL # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL -1DF25..1DF2B ; AL # L& [7] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER MEOW +1DF25..1DF2A ; AL # L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK 1E030..1E06D ; AL # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E100..1E12C ; AL # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W 1E137..1E13D ; AL # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER @@ -1615,7 +1615,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26691 +# Total code points: 26690 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index b26debb18..07b0176b5 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-06-05, 22:44:20 GMT +# Date: 2024-04-30, 21:48:18 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -41461,7 +41461,6 @@ FFFD ; REPLACEMENT CHARACTER 1DF28 ; LATIN SMALL LETTER R WITH MID-HEIGHT LEFT HOOK 1DF29 ; LATIN SMALL LETTER S WITH MID-HEIGHT LEFT HOOK 1DF2A ; LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK -1DF2B ; LATIN SMALL LETTER MEOW 1E000 ; COMBINING GLAGOLITIC LETTER AZU 1E001 ; COMBINING GLAGOLITIC LETTER BUKY 1E002 ; COMBINING GLAGOLITIC LETTER VEDE @@ -45368,6 +45367,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154999 +# Total code points: 154998 # EOF From de80d792ad27f4493de53ef55204073827723188 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 6 Jun 2024 01:03:09 +0200 Subject: [PATCH 22/24] Revert "Revert "mind the gap"" This reverts commit 9bd67946d9ec4798ce5b02697eaab46608c8ba27. --- .../java/org/unicode/text/UCA/PrimariesToFractional.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java b/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java index 334f35fa6..34ef90337 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java +++ b/unicodetools/src/main/java/org/unicode/text/UCA/PrimariesToFractional.java @@ -724,11 +724,11 @@ public PrimariesToFractional(UCA uca) { setOptionsForScript(UCD_Types.Vithkuqi).noTwoBytePrimariesIfVariants(); // Extinct script, use three-byte primaries for the few characters with variants. setOptionsForScript(UCD_Types.Elymaic).noTwoBytePrimariesIfVariants(); - // Minor script, avoid lead byte overflow. - setOptionsForScript(UCD_Types.Egyptian_Hieroglyphs).newByte(); + // Large Excluded Script, minimal gaps. + setOptionsForScript(UCD_Types.Egyptian_Hieroglyphs).newByte().minimalGap3(); // Register the scripts as aliases. setOptionsForScripts(UCD_Types.Meroitic_Cursive, UCD_Types.Meroitic_Hieroglyphs); - // Larged Excluded Script, minimal gaps. + // Large Excluded Script, minimal gaps. setOptionsForScripts(UCD_Types.Tangut).minimalGap3(); // Han uses many lead bytes, so that tailoring tens of thousands of characters // can use many two-byte primaries. From 19975d381e116a4b29a40840599e74d23f572701 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 6 Jun 2024 01:03:25 +0200 Subject: [PATCH 23/24] Revert "break the security tests" This reverts commit 75d352c8ca18d19a6fc4179850b724bce704f505. --- .../resources/org/unicode/text/UCD/SecurityInvariantTest.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt index e0031ce68..838516704 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/SecurityInvariantTest.txt @@ -10,7 +10,7 @@ # “Multiple values are not assigned to characters with strong restrictions: # Not_Character, Deprecated, Default_Ignorable, Not_NFKC.” -\p{Identifier_Type=Deprecated} = [\p{Deprecated}a] +\p{Identifier_Type=Deprecated} = \p{Deprecated} # When a code point fits the criteria for multiple restrictions, then the strongest one wins. # For example, Default_Ignorable is trumped by unassigned and Deprecated. From f82a05add10855be918a54b9d3ec40d664baf4a3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 6 Jun 2024 01:07:36 +0200 Subject: [PATCH 24/24] documentation --- docs/pipeline.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/docs/pipeline.md b/docs/pipeline.md index 695f659ec..69f2f4b88 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -49,7 +49,7 @@ Indic scripts only: - [ ] Commit --- -- [ ] PropsList.txt — Add Other_Alphabetic, Diacritic, and Extender to satisfy invariants, or to taste +- [ ] PropsList.txt — Add Other_Alphabetic, Other_Lowercase, Diacritic, and Extender to satisfy invariants, or to taste - [ ] Commit --- @@ -67,8 +67,13 @@ PR preparation: - [ ] PR button — Set to DRAFT pull request - unless approved for the upcoming version - [ ] PR button — Press - - The **Check UCA data** CI check might fail; many character additions need separate handling there, - but that is out of scope for the PAG work of preparing `data-for-new`. This will get resolved later. + - The **Check UCA data** and **Check security data invariants** CI checks are + suppressed; many character additions need separate handling there, + but that is out of scope for the PAG work of preparing `data-for-new`, + so reporting those failures could distract from real issues + in the UCD invariants. + UCA and security data issues are addressed later in the process, + before the start of β review. ## Scripts