diff --git a/unicodetools/data/ucd/dev/CaseFolding.txt b/unicodetools/data/ucd/dev/CaseFolding.txt index 2f1047b36..d0c2c69af 100644 --- a/unicodetools/data/ucd/dev/CaseFolding.txt +++ b/unicodetools/data/ucd/dev/CaseFolding.txt @@ -930,7 +930,6 @@ 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA -1FD3; S; 0390; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY @@ -939,7 +938,6 @@ 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA -1FE3; S; 03B0; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI @@ -1335,7 +1333,6 @@ FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T -FB05; S; FB06; # LATIN SMALL LIGATURE LONG S T FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 118350283..ec2af4ada 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2023-11-10, 22:25:26 GMT +# Date: 2024-01-10, 15:40:42 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -726,7 +726,7 @@ FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS A 1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN 1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK 1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA -1DD3..1DF4 ; Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DE7..1DF4 ; Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 1E00..1F15 ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F18..1F1D ; Alphabetic # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F20..1F45 ; Alphabetic # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA @@ -1440,7 +1440,7 @@ FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANG 30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Alphabetic # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 138766 +# Total code points: 138746 # ================================================ diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 6225e4c33..44992a259 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2023-11-10, 22:06:29 GMT +# Date: 2024-01-10, 15:41:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -637,7 +637,7 @@ FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH L 1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T 1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG 1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN -1DD3..1DF4 ; Other_Alphabetic # Mn [34] COMBINING LATIN SMALL LETTER FLATTENED OPEN A ABOVE..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS 24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z 2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA @@ -850,7 +850,7 @@ FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA 1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z 1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z -# Total code points: 1495 +# Total code points: 1475 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 4376ff723..865882b95 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -23591,7 +23591,7 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 130DD;EGYPTIAN HIEROGLYPH E010;Lo;0;L;;;;;N;;;;; 130DE;EGYPTIAN HIEROGLYPH E011;Lo;0;L;;;;;N;;;;; 130DF;EGYPTIAN HIEROGLYPH E012;Lo;0;L;;;;;N;;;;; -130E0;EGYPTIAN HIEROGLYPH E013;Lo;0;L;;;;;N;;;;; +130E0;MEOW;Lo;0;L;;;;;N;;;;; 130E1;EGYPTIAN HIEROGLYPH E014;Lo;0;L;;;;;N;;;;; 130E2;EGYPTIAN HIEROGLYPH E015;Lo;0;L;;;;;N;;;;; 130E3;EGYPTIAN HIEROGLYPH E016;Lo;0;L;;;;;N;;;;; diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 145e66ed9..31b320901 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2023-11-10, 22:25:34 GMT +# Date: 2024-01-10, 15:40:50 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -34256,7 +34256,7 @@ FFFD ; REPLACEMENT CHARACTER 130DD ; EGYPTIAN HIEROGLYPH E010 130DE ; EGYPTIAN HIEROGLYPH E011 130DF ; EGYPTIAN HIEROGLYPH E012 -130E0 ; EGYPTIAN HIEROGLYPH E013 +130E0 ; MEOW 130E1 ; EGYPTIAN HIEROGLYPH E014 130E2 ; EGYPTIAN HIEROGLYPH E015 130E3 ; EGYPTIAN HIEROGLYPH E016 diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java index 93e05b25d..a63659aff 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateCaseFolding.java @@ -245,14 +245,14 @@ static void drawLine( // ΐ → ΐ // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA → // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x1FD3, 0x0390, + // 0x1FD3, 0x0390, // ΰ → ΰ // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA → // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x1FE3, 0x03B0, + // 0x1FE3, 0x03B0, // ſt → st // LATIN SMALL LIGATURE LONG S T → LATIN SMALL LIGATURE ST - 0xFB05, 0xFB06 + // 0xFB05, 0xFB06 }; private static Map getCaseFolding( diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 9fa0ca3a0..dcb2601a0 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -286,7 +286,7 @@ Let $gcMn_bcL = [\u0CBF\u0CC6\U00011A07\U00011A08\U00011C3F] # Stability: The Case_Folding property value is limited so that no string when case folded expands to more than 3× in length (measured in code units). \p{cf=/..../} = [] # Note: This bound is tight: -\p{cf=/.../} ⊃ [] +\p{cf=/.../} ⊋ [] # Case folding is not the same as lowercasing: Cherokee case folds to uppercase. In \p{sc=Cher} cf = uc