From 4ac8e8451b6b2757b62d8f046faee4b90eb974b2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 10 Jan 2024 17:33:18 +0100 Subject: [PATCH] Revert "Break everything" This reverts commit 70a4ee2bdb98401b4b35c6d7c882911455199d3c. --- unicodetools/data/ucd/dev/CaseFolding.txt | 3 +++ unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 6 +++--- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- unicodetools/data/ucd/dev/UnicodeData.txt | 2 +- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 4 ++-- .../main/java/org/unicode/text/UCD/GenerateCaseFolding.java | 6 +++--- .../resources/org/unicode/text/UCD/UnicodeInvariantTest.txt | 2 +- 7 files changed, 16 insertions(+), 13 deletions(-) diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index d0c2c69af..2f1047b36 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -930,6 +930,7 @@ 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY @@ -938,6 +939,7 @@ 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA +1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI @@ -1333,6 +1335,7 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T +FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index ec2af4ada..118350283 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-01-10, 15:40:42 GMT +# Date: 2023-11-10, 22:25:26 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -726,7 +726,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DE7..1DF4 ; Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DD3..1DF4 ; Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 1E00..1F15 ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; Alphabetic # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F20..1F45 ; Alphabetic # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA @@ -1440,7 +1440,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138746 +# Total code points: 138766 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 44992a259..6225e4c33 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-01-10, 15:41:07 GMT +# Date: 2023-11-10, 22:06:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -637,7 +637,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN -1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DD3..1DF4 ; Other_Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA @@ -850,7 +850,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1475 +# Total code points: 1495 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 865882b95..4376ff723 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -23591,7 +23591,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 130DD;EGYPTIAN HIEROGLYPH E010;Lo;0;L;;;;;N;;;;; 130DE;EGYPTIAN HIEROGLYPH E011;Lo;0;L;;;;;N;;;;; 130DF;EGYPTIAN HIEROGLYPH E012;Lo;0;L;;;;;N;;;;; -130E0;MEOW;Lo;0;L;;;;;N;;;;; +130E0;EGYPTIAN HIEROGLYPH E013;Lo;0;L;;;;;N;;;;; 130E1;EGYPTIAN HIEROGLYPH E014;Lo;0;L;;;;;N;;;;; 130E2;EGYPTIAN HIEROGLYPH E015;Lo;0;L;;;;;N;;;;; 130E3;EGYPTIAN HIEROGLYPH E016;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 31b320901..145e66ed9 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-01-10, 15:40:50 GMT +# Date: 2023-11-10, 22:25:34 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -34256,7 +34256,7 @@ FFFD ; REPLACEMENT CHARACTER 130DD ; EGYPTIAN HIEROGLYPH E010 130DE ; EGYPTIAN HIEROGLYPH E011 130DF ; EGYPTIAN HIEROGLYPH E012 -130E0 ; MEOW +130E0 ; EGYPTIAN HIEROGLYPH E013 130E1 ; EGYPTIAN HIEROGLYPH E014 130E2 ; EGYPTIAN HIEROGLYPH E015 130E3 ; EGYPTIAN HIEROGLYPH E016 diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java index a63659aff..93e05b25d 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java @@ -245,14 +245,14 @@ static void drawLine( // ΐ → ΐ // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA → // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - // 0x1FD3, 0x0390, + 0x1FD3, 0x0390, // ΰ → ΰ // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA → // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - // 0x1FE3, 0x03B0, + 0x1FE3, 0x03B0, // ſt → st // LATIN SMALL LIGATURE LONG S T → LATIN SMALL LIGATURE ST - // 0xFB05, 0xFB06 + 0xFB05, 0xFB06 }; private static Map getCaseFolding( diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index dcb2601a0..9fa0ca3a0 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -286,7 +286,7 @@ Let $gcMn_bcL = [\u0CBF\u0CC6\U00011A07\U00011A08\U00011C3F] # Stability: The Case_Folding property value is limited so that no string when case folded expands to more than 3× in length (measured in code units). \p{cf=/..../} = [] # Note: This bound is tight: -\p{cf=/.../} ⊋ [] +\p{cf=/.../} ⊃ [] # Case folding is not the same as lowercasing: Cherokee case folds to uppercase. In \p{sc=Cher} cf = uc