diff --git a/unicodetools/data/idna/dev/IdnaTestV2.txt b/unicodetools/data/idna/dev/IdnaTestV2.txt index abcc8277a..fb583ae4d 100644 --- a/unicodetools/data/idna/dev/IdnaTestV2.txt +++ b/unicodetools/data/idna/dev/IdnaTestV2.txt @@ -1,5 +1,5 @@ # IdnaTestV2.txt -# Date: 2024-05-22, 02:31:53 GMT +# Date: 2024-05-22, 22:45:49 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -554,6 +554,11 @@ xn--ASCII-; ascii; [P4]; ; ; ; # ascii ascii; ; ; ; ; ; # ascii xn--unicode-.org; unicode.org; [P4]; ; ; ; # unicode.org unicode.org; ; ; ; ; ; # unicode.org +陋㛼当𤎫竮䗗; 陋㛼当𤎫竮䗗; ; xn--snl253bgitxhzwu2arn60c; ; ; # 陋㛼当𤎫竮䗗 +陋㛼当𤎫竮䗗; ; ; xn--snl253bgitxhzwu2arn60c; ; ; # 陋㛼当𤎫竮䗗 +xn--snl253bgitxhzwu2arn60c; 陋㛼当𤎫竮䗗; ; xn--snl253bgitxhzwu2arn60c; ; ; # 陋㛼当𤎫竮䗗 +電𡍪弳䎫窮䵗; ; ; xn--kbo60w31ob3z6t3av9z5b; ; ; # 電𡍪弳䎫窮䵗 +xn--kbo60w31ob3z6t3av9z5b; 電𡍪弳䎫窮䵗; ; xn--kbo60w31ob3z6t3av9z5b; ; ; # 電𡍪弳䎫窮䵗 # RANDOMIZED TESTS diff --git a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java index 6fbe881ba..62d299211 100644 --- a/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java +++ b/unicodetools/src/main/java/org/unicode/idna/GenerateIdnaTest.java @@ -935,5 +935,38 @@ public static UnicodeSet getIdna2008Valid() { "xn---", "xn--ASCII-", "xn--unicode-.org", + // Characters in NormalizationCorrections.txt. + // Escpecially ones that changed in Unicode 4.0, after IDNA2003 was baked. + // F951;96FB;964B;3.2.0 # Corrigendum 3 + // 2F868;2136A;36FC;4.0.0 # Corrigendum 4 + // 2F874;5F33;5F53;4.0.0 # Corrigendum 4 + // 2F91F;43AB;243AB;4.0.0 # Corrigendum 4 + // 2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4 + // 2F9BF;4D57;45D7;4.0.0 # Corrigendum 4 + // + // source characters + new StringBuilder("\uF951") + .appendCodePoint(0x2F868) + .appendCodePoint(0x2F874) + .appendCodePoint(0x2F91F) + .appendCodePoint(0x2F95F) + .appendCodePoint(0x2F9BF) + .toString(), + // old decompositions + new StringBuilder("\u96FB") + .appendCodePoint(0x2136A) + .appendCodePoint(0x5F33) + .appendCodePoint(0x43AB) + .appendCodePoint(0x7AAE) + .appendCodePoint(0x4D57) + .toString(), + // corrected decompositions + new StringBuilder("\u964B") + .appendCodePoint(0x36FC) + .appendCodePoint(0x5F53) + .appendCodePoint(0x243AB) + .appendCodePoint(0x7AEE) + .appendCodePoint(0x45D7) + .toString(), }; }