diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 008613b90..c5a85ed4a 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2023-11-10, 20:57:38 GMT +# Date: 2024-01-18, 12:57:29 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -66,6 +66,8 @@ 0592 05B7 05BC 05A5 05B0 05C0 05C4 05AD;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4;05B0 05B7 05BC 05A5 0592 05C0 05AD 05C4; # (◌֒◌ַ◌ּ◌֥◌ְ׀◌ׄ◌֭; ◌ְ◌ַ◌ּ◌֥◌֒׀◌֭◌ׄ; ◌ְ◌ַ◌ּ◌֥◌֒׀◌֭◌ׄ; ◌ְ◌ַ◌ּ◌֥◌֒׀◌֭◌ׄ; ◌ְ◌ַ◌ּ◌֥◌֒׀◌֭◌ׄ; ) HEBREW ACCENT SEGOL, HEBREW POINT PATAH, HEBREW POINT DAGESH OR MAPIQ, HEBREW ACCENT MERKHA, HEBREW POINT SHEVA, HEBREW PUNCTUATION PASEQ, HEBREW MARK UPPER DOT, HEBREW ACCENT DEHI 1100 AC00 11A8;1100 AC01;1100 1100 1161 11A8;1100 AC01;1100 1100 1161 11A8; # (ᄀ각; ᄀ각; ᄀ각; ᄀ각; ᄀ각; ) HANGUL CHOSEONG KIYEOK, HANGUL SYLLABLE GA, HANGUL JONGSEONG KIYEOK 1100 AC00 11A8 11A8;1100 AC01 11A8;1100 1100 1161 11A8 11A8;1100 AC01 11A8;1100 1100 1161 11A8 11A8; # (ᄀ각ᆨ; ᄀ각ᆨ; ᄀ각ᆨ; ᄀ각ᆨ; ᄀ각ᆨ; ) HANGUL CHOSEONG KIYEOK, HANGUL SYLLABLE GA, HANGUL JONGSEONG KIYEOK, HANGUL JONGSEONG KIYEOK +01C4 0323;01C4 0323;01C4 0323;0044 1E92 030C;0044 005A 0323 030C; # (DŽ◌̣; DŽ◌̣; DŽ◌̣; DẒ◌̌; DZ◌̣◌̌; ) LATIN CAPITAL LETTER DZ WITH CARON, COMBINING DOT BELOW +0DDD 0334;0DDD 0334;0DD9 0DCF 0334 0DCA;0DDD 0334;0DD9 0DCF 0334 0DCA; # (ෝ◌̴; ෝ◌̴; ො◌̴◌්; ෝ◌̴; ො◌̴◌්; ) SINHALA VOWEL SIGN KOMBUVA HAA DIGA AELA-PILLA, COMBINING TILDE OVERLAY # @Part1 # Character by character test # All characters not explicitly occurring in c1 of Part 1 have identical NFC, D, KC, KD forms. diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java index 42706826e..c073630fc 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java @@ -1051,6 +1051,13 @@ static final String comma(String s) { "\u0592\u05B7\u05BC\u05A5\u05B0\u05C0\u05C4\u05AD", "\u1100\uAC00\u11A8", "\u1100\uAC00\u11A8\u11A8", + // Some implementations have an edge case when a character whose + // decomposition contains multiple starters and ends with a non-starter + // is followed by a non-starter of lower CCC. + // See https://github.com/unicode-org/unicodetools/issues/656 + // and https://github.com/unicode-org/icu4x/pull/4530. + "\u01C4\u0323", + "\u0DDD\u0334", }; /* static final void backwardsCompat(String directory, String filename, int[] list) throws IOException {