From a742327d485112e1a942244f3f5c97dd6691917c Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 3 Jan 2024 17:19:04 +0100 Subject: [PATCH] Split it into its own part and look at chaining compositions, not decompositions --- .../data/ucd/dev/NormalizationTest.txt | 107 ++++++++---------- .../org/unicode/text/UCD/GenerateData.java | 42 ++++--- 2 files changed, 77 insertions(+), 72 deletions(-) diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 4e6d28af5..e217724e9 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2023-12-29, 18:17:26 GMT +# Date: 2024-01-03, 16:15:25 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -19961,86 +19961,79 @@ FB2A 05BC;05E9 05BC 05C1;05E9 05BC 05C1;05E9 05BC 05C1;05E9 05BC 05C1; # (שׁ◌ FB49 05C1;05E9 05BC 05C1;05E9 05BC 05C1;05E9 05BC 05C1;05E9 05BC 05C1; # (שּ◌ׁ; ש◌ּ◌ׁ; ש◌ּ◌ׁ; ש◌ּ◌ׁ; ש◌ּ◌ׁ; ) HEBREW LETTER SHIN WITH DAGESH, HEBREW POINT SHIN DOT FB2B 05BC;05E9 05BC 05C2;05E9 05BC 05C2;05E9 05BC 05C2;05E9 05BC 05C2; # (שׂ◌ּ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ) HEBREW LETTER SHIN WITH SIN DOT, HEBREW POINT DAGESH OR MAPIQ FB49 05C2;05E9 05BC 05C2;05E9 05BC 05C2;05E9 05BC 05C2;05E9 05BC 05C2; # (שּ◌ׂ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ש◌ּ◌ׂ; ) HEBREW LETTER SHIN WITH DAGESH, HEBREW POINT SIN DOT -1E4E 0301;1E4E 0301;004F 0303 0308 0301;1E4E 0301;004F 0303 0308 0301; # (Ṏ◌́; Ṏ◌́; O◌̃◌̈◌́; Ṏ◌́; O◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE ACCENT -1E4E 0341;1E4E 0301;004F 0303 0308 0301;1E4E 0301;004F 0303 0308 0301; # (Ṏ◌́; Ṏ◌́; O◌̃◌̈◌́; Ṏ◌́; O◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE TONE MARK -1E4F 0301;1E4F 0301;006F 0303 0308 0301;1E4F 0301;006F 0303 0308 0301; # (ṏ◌́; ṏ◌́; o◌̃◌̈◌́; ṏ◌́; o◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE ACCENT -1E4F 0341;1E4F 0301;006F 0303 0308 0301;1E4F 0301;006F 0303 0308 0301; # (ṏ◌́; ṏ◌́; o◌̃◌̈◌́; ṏ◌́; o◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE TONE MARK -00D5 0344 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Õ◌̈́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -1E4E 0301 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Ṏ◌́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -1E4E 031B 0301;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Ṏ◌̛◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -1E4E 031B 0341;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Ṏ◌̛◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -1E4E 0341 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Ṏ◌́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -004F 0303 0344 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (O◌̃◌̈́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O, COMBINING TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -00D5 0308 0301 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Õ◌̈◌́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -00D5 0308 031B 0301;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Õ◌̈◌̛◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -00D5 0308 031B 0341;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Õ◌̈◌̛◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -00D5 0308 0341 031B;1EE0 0308 0301;004F 031B 0303 0308 0301;1EE0 0308 0301;004F 031B 0303 0308 0301; # (Õ◌̈◌́◌̛; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; Ỡ◌̈◌́; O◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -00F5 0344 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (õ◌̈́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -1E4F 0301 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (ṏ◌́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -1E4F 031B 0301;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (ṏ◌̛◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -1E4F 031B 0341;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (ṏ◌̛◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -1E4F 0341 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (ṏ◌́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE AND DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -006F 0303 0344 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (o◌̃◌̈́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O, COMBINING TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -00F5 0308 0301 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (õ◌̈◌́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -00F5 0308 031B 0301;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (õ◌̈◌̛◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -00F5 0308 031B 0341;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (õ◌̈◌̛◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -00F5 0308 0341 031B;1EE1 0308 0301;006F 031B 0303 0308 0301;1EE1 0308 0301;006F 031B 0303 0308 0301; # (õ◌̈◌́◌̛; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ỡ◌̈◌́; o◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER O WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -0168 0344 031B;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (Ũ◌̈́◌̛; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U WITH TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -0055 0303 0344 031B;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (U◌̃◌̈́◌̛; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U, COMBINING TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -0168 0308 0301 031B;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (Ũ◌̈◌́◌̛; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -0168 0308 031B 0301;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (Ũ◌̈◌̛◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -0168 0308 031B 0341;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (Ũ◌̈◌̛◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -0168 0308 0341 031B;1EEE 0308 0301;0055 031B 0303 0308 0301;1EEE 0308 0301;0055 031B 0303 0308 0301; # (Ũ◌̈◌́◌̛; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; Ữ◌̈◌́; U◌̛◌̃◌̈◌́; ) LATIN CAPITAL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -0169 0344 031B;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (ũ◌̈́◌̛; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U WITH TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -0075 0303 0344 031B;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (u◌̃◌̈́◌̛; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U, COMBINING TILDE, COMBINING GREEK DIALYTIKA TONOS, COMBINING HORN -0169 0308 0301 031B;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (ũ◌̈◌́◌̛; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING HORN -0169 0308 031B 0301;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (ũ◌̈◌̛◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE ACCENT -0169 0308 031B 0341;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (ũ◌̈◌̛◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING HORN, COMBINING ACUTE TONE MARK -0169 0308 0341 031B;1EEF 0308 0301;0075 031B 0303 0308 0301;1EEF 0308 0301;0075 031B 0303 0308 0301; # (ũ◌̈◌́◌̛; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ữ◌̈◌́; u◌̛◌̃◌̈◌́; ) LATIN SMALL LETTER U WITH TILDE, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING HORN -1E7A 0301;1E7A 0301;0055 0304 0308 0301;1E7A 0301;0055 0304 0308 0301; # (Ṻ◌́; Ṻ◌́; U◌̄◌̈◌́; Ṻ◌́; U◌̄◌̈◌́; ) LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS, COMBINING ACUTE ACCENT -1E7A 0341;1E7A 0301;0055 0304 0308 0301;1E7A 0301;0055 0304 0308 0301; # (Ṻ◌́; Ṻ◌́; U◌̄◌̈◌́; Ṻ◌́; U◌̄◌̈◌́; ) LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS, COMBINING ACUTE TONE MARK -1E7B 0301;1E7B 0301;0075 0304 0308 0301;1E7B 0301;0075 0304 0308 0301; # (ṻ◌́; ṻ◌́; u◌̄◌̈◌́; ṻ◌́; u◌̄◌̈◌́; ) LATIN SMALL LETTER U WITH MACRON AND DIAERESIS, COMBINING ACUTE ACCENT -1E7B 0341;1E7B 0301;0075 0304 0308 0301;1E7B 0301;0075 0304 0308 0301; # (ṻ◌́; ṻ◌́; u◌̄◌̈◌́; ṻ◌́; u◌̄◌̈◌́; ) LATIN SMALL LETTER U WITH MACRON AND DIAERESIS, COMBINING ACUTE TONE MARK -014C 0344 0328;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (Ō◌̈́◌̨; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O WITH MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING OGONEK -004F 0304 0344 0328;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (O◌̄◌̈́◌̨; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING OGONEK -014C 0308 0301 0328;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (Ō◌̈◌́◌̨; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING OGONEK -014C 0308 0328 0301;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (Ō◌̈◌̨◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING OGONEK, COMBINING ACUTE ACCENT -014C 0308 0328 0341;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (Ō◌̈◌̨◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING OGONEK, COMBINING ACUTE TONE MARK -014C 0308 0341 0328;01EC 0308 0301;004F 0328 0304 0308 0301;01EC 0308 0301;004F 0328 0304 0308 0301; # (Ō◌̈◌́◌̨; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; Ǭ◌̈◌́; O◌̨◌̄◌̈◌́; ) LATIN CAPITAL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING OGONEK -014D 0344 0328;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (ō◌̈́◌̨; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O WITH MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING OGONEK -006F 0304 0344 0328;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (o◌̄◌̈́◌̨; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING OGONEK -014D 0308 0301 0328;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (ō◌̈◌́◌̨; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING ACUTE ACCENT, COMBINING OGONEK -014D 0308 0328 0301;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (ō◌̈◌̨◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING OGONEK, COMBINING ACUTE ACCENT -014D 0308 0328 0341;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (ō◌̈◌̨◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING OGONEK, COMBINING ACUTE TONE MARK -014D 0308 0341 0328;01ED 0308 0301;006F 0328 0304 0308 0301;01ED 0308 0301;006F 0328 0304 0308 0301; # (ō◌̈◌́◌̨; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ǭ◌̈◌́; o◌̨◌̄◌̈◌́; ) LATIN SMALL LETTER O WITH MACRON, COMBINING DIAERESIS, COMBINING ACUTE TONE MARK, COMBINING OGONEK -004C 0304 0344 0323;1E38 0308 0301;004C 0323 0304 0308 0301;1E38 0308 0301;004C 0323 0304 0308 0301; # (L◌̄◌̈́◌̣; Ḹ◌̈◌́; L◌̣◌̄◌̈◌́; Ḹ◌̈◌́; L◌̣◌̄◌̈◌́; ) LATIN CAPITAL LETTER L, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING DOT BELOW -006C 0304 0344 0323;1E39 0308 0301;006C 0323 0304 0308 0301;1E39 0308 0301;006C 0323 0304 0308 0301; # (l◌̄◌̈́◌̣; ḹ◌̈◌́; l◌̣◌̄◌̈◌́; ḹ◌̈◌́; l◌̣◌̄◌̈◌́; ) LATIN SMALL LETTER L, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING DOT BELOW -0052 0304 0344 0323;1E5C 0308 0301;0052 0323 0304 0308 0301;1E5C 0308 0301;0052 0323 0304 0308 0301; # (R◌̄◌̈́◌̣; Ṝ◌̈◌́; R◌̣◌̄◌̈◌́; Ṝ◌̈◌́; R◌̣◌̄◌̈◌́; ) LATIN CAPITAL LETTER R, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING DOT BELOW -0072 0304 0344 0323;1E5D 0308 0301;0072 0323 0304 0308 0301;1E5D 0308 0301;0072 0323 0304 0308 0301; # (r◌̄◌̈́◌̣; ṝ◌̈◌́; r◌̣◌̄◌̈◌́; ṝ◌̈◌́; r◌̣◌̄◌̈◌́; ) LATIN SMALL LETTER R, COMBINING MACRON, COMBINING GREEK DIALYTIKA TONOS, COMBINING DOT BELOW +# +@Part5 # Chained compositions +# +# Linking on TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE +# TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN EE +# Link in TULU-TIGALARI VOWEL SIGN AI 1138B 113C5 113C2;1138E 113C5;1138B 113C2 113C2 113C2;1138E 113C5;1138B 113C2 113C2 113C2; # (𑎎𑏅; 𑎎𑏅; 𑎎𑏅; 𑎎𑏅; 𑎎𑏅; ) TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI VOWEL SIGN EE +# TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN AA +# Link in TULU-TIGALARI VOWEL SIGN AI 1138B 113C5 113B8;1138E 113C7;1138B 113C2 113C2 113B8;1138E 113C7;1138B 113C2 113C2 113B8; # (𑎎𑏇; 𑎎𑏇; 𑎎𑏇; 𑎎𑏇; 𑎎𑏇; ) TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI VOWEL SIGN AA +# TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI AU LENGTH MARK +# Link in TULU-TIGALARI VOWEL SIGN AI 1138B 113C5 113C9;1138E 113C8;1138B 113C2 113C2 113C9;1138E 113C8;1138B 113C2 113C2 113C9; # (𑎎𑏈; 𑎎𑏈; 𑎎𑏈; 𑎎𑏈; 𑎎𑏈; ) TULU-TIGALARI LETTER EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI AU LENGTH MARK +# TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN EE +# Link in TULU-TIGALARI VOWEL SIGN AI 113C2 113C5 113C2;113C5 113C5;113C2 113C2 113C2 113C2;113C5 113C5;113C2 113C2 113C2 113C2; # (𑏅𑏅; 𑏅𑏅; 𑏅𑏅; 𑏅𑏅; 𑏅𑏅; ) TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI VOWEL SIGN EE +# TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN AA +# Link in TULU-TIGALARI VOWEL SIGN AI 113C2 113C5 113B8;113C5 113C7;113C2 113C2 113C2 113B8;113C5 113C7;113C2 113C2 113C2 113B8; # (𑏅𑏇; 𑏅𑏇; 𑏅𑏇; 𑏅𑏇; 𑏅𑏇; ) TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI VOWEL SIGN AA +# TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN EE+TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI AU LENGTH MARK +# Link in TULU-TIGALARI VOWEL SIGN AI 113C2 113C5 113C9;113C5 113C8;113C2 113C2 113C2 113C9;113C5 113C8;113C2 113C2 113C2 113C9; # (𑏅𑏈; 𑏅𑏈; 𑏅𑏈; 𑏅𑏈; 𑏅𑏈; ) TULU-TIGALARI VOWEL SIGN EE, TULU-TIGALARI VOWEL SIGN AI, TULU-TIGALARI AU LENGTH MARK +# Linking on GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 1611E;16121 16121;1611E 1611E 1611E 1611E;16121 16121;1611E 1611E 1611E 1611E; # (◌𖄞◌𖄡◌𖄞; ◌𖄡◌𖄡; ◌𖄞◌𖄞◌𖄞◌𖄞; ◌𖄡◌𖄡; ◌𖄞◌𖄞◌𖄞◌𖄞; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN AA +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL LENGTH MARK +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16129;16121 16122;1611E 1611E 1611E 16129;16121 16122;1611E 1611E 1611E 16129; # (◌𖄞◌𖄡◌𖄩; ◌𖄡◌𖄢; ◌𖄞◌𖄞◌𖄞◌𖄩; ◌𖄡◌𖄢; ◌𖄞◌𖄞◌𖄞◌𖄩; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL LENGTH MARK +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN I +# Link in GURUNG KHEMA VOWEL SIGN O 1611E 16126;16121 16123;1611E 1611E 1611E 1611F;16121 16123;1611E 1611E 1611E 1611F; # (◌𖄞◌𖄦; ◌𖄡◌𖄣; ◌𖄞◌𖄞◌𖄞◌𖄟; ◌𖄡◌𖄣; ◌𖄞◌𖄞◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN O +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 1611F;16121 16123;1611E 1611E 1611E 1611F;16121 16123;1611E 1611E 1611E 1611F; # (◌𖄞◌𖄡◌𖄟; ◌𖄡◌𖄣; ◌𖄞◌𖄞◌𖄞◌𖄟; ◌𖄡◌𖄣; ◌𖄞◌𖄞◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN I +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN II +# Link in GURUNG KHEMA VOWEL SIGN AU 1611E 16128;16121 16125;1611E 1611E 1611E 16120;16121 16125;1611E 1611E 1611E 16120; # (◌𖄞◌𖄨; ◌𖄡◌𖄥; ◌𖄞◌𖄞◌𖄞◌𖄠; ◌𖄡◌𖄥; ◌𖄞◌𖄞◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AU +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16120;16121 16125;1611E 1611E 1611E 16120;16121 16125;1611E 1611E 1611E 16120; # (◌𖄞◌𖄡◌𖄠; ◌𖄡◌𖄥; ◌𖄞◌𖄞◌𖄞◌𖄠; ◌𖄡◌𖄥; ◌𖄞◌𖄞◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN II +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN I +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16123;16121 16126;1611E 1611E 1611E 1611E 1611F;16121 16126;1611E 1611E 1611E 1611E 1611F; # (◌𖄞◌𖄡◌𖄣; ◌𖄡◌𖄦; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄟; ◌𖄡◌𖄦; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN E +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 1611E 1611F;16121 16126;1611E 1611E 1611E 1611E 1611F;16121 16126;1611E 1611E 1611E 1611E 1611F; # (◌𖄞◌𖄡◌𖄞◌𖄟; ◌𖄡◌𖄦; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄟; ◌𖄡◌𖄦; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN I +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL LENGTH MARK, GURUNG KHEMA VOWEL SIGN I +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16124;16121 16127;1611E 1611E 1611E 16129 1611F;16121 16127;1611E 1611E 1611E 16129 1611F; # (◌𖄞◌𖄡◌𖄤; ◌𖄡◌𖄧; ◌𖄞◌𖄞◌𖄞◌𖄩◌𖄟; ◌𖄡◌𖄧; ◌𖄞◌𖄞◌𖄞◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN EE +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16129 1611F;16121 16127;1611E 1611E 1611E 16129 1611F;16121 16127;1611E 1611E 1611E 16129 1611F; # (◌𖄞◌𖄡◌𖄩◌𖄟; ◌𖄡◌𖄧; ◌𖄞◌𖄞◌𖄞◌𖄩◌𖄟; ◌𖄡◌𖄧; ◌𖄞◌𖄞◌𖄞◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL LENGTH MARK, GURUNG KHEMA VOWEL SIGN I +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN II +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 16125;16121 16128;1611E 1611E 1611E 1611E 16120;16121 16128;1611E 1611E 1611E 1611E 16120; # (◌𖄞◌𖄡◌𖄥; ◌𖄡◌𖄨; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄠; ◌𖄡◌𖄨; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN AI +# Link in GURUNG KHEMA VOWEL SIGN U 1611E 16121 1611E 16120;16121 16128;1611E 1611E 1611E 1611E 16120;16121 16128;1611E 1611E 1611E 1611E 16120; # (◌𖄞◌𖄡◌𖄞◌𖄠; ◌𖄡◌𖄨; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄠; ◌𖄡◌𖄨; ◌𖄞◌𖄞◌𖄞◌𖄞◌𖄠; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN U, GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN II +# Linking on GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL LENGTH MARK +# GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN AA+GURUNG KHEMA VOWEL LENGTH MARK, GURUNG KHEMA VOWEL SIGN I +# Link in GURUNG KHEMA VOWEL SIGN OO 1611E 16127;16121 16124;1611E 1611E 16129 1611F;16121 16124;1611E 1611E 16129 1611F; # (◌𖄞◌𖄧; ◌𖄡◌𖄤; ◌𖄞◌𖄞◌𖄩◌𖄟; ◌𖄡◌𖄤; ◌𖄞◌𖄞◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN OO +# Link in GURUNG KHEMA VOWEL SIGN UU 1611E 16122 1611F;16121 16124;1611E 1611E 16129 1611F;16121 16124;1611E 1611E 16129 1611F; # (◌𖄞◌𖄢◌𖄟; ◌𖄡◌𖄤; ◌𖄞◌𖄞◌𖄩◌𖄟; ◌𖄡◌𖄤; ◌𖄞◌𖄞◌𖄩◌𖄟; ) GURUNG KHEMA VOWEL SIGN AA, GURUNG KHEMA VOWEL SIGN UU, GURUNG KHEMA VOWEL SIGN I +# Linking on KIRAT RAI VOWEL SIGN E+KIRAT RAI VOWEL SIGN E +# KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN E+KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN E +# Link in KIRAT RAI VOWEL SIGN AI 16D67 16D68 16D67;16D68 16D68;16D67 16D67 16D67 16D67;16D68 16D68;16D67 16D67 16D67 16D67; # (𖵨𖵨; 𖵨𖵨; 𖵨𖵨; 𖵨𖵨; 𖵨𖵨; ) KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN AI, KIRAT RAI VOWEL SIGN E +# KIRAT RAI VOWEL SIGN AA, KIRAT RAI VOWEL SIGN E+KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN E +# Link in KIRAT RAI VOWEL SIGN AU 16D6A 16D67;16D6A 16D67;16D63 16D67 16D67 16D67;16D6A 16D67;16D63 16D67 16D67 16D67; # (𖵪𖵧; 𖵪𖵧; 𖵪𖵧; 𖵪𖵧; 𖵪𖵧; ) KIRAT RAI VOWEL SIGN AU, KIRAT RAI VOWEL SIGN E +# Link in KIRAT RAI VOWEL SIGN AI 16D63 16D68 16D67;16D6A 16D67;16D63 16D67 16D67 16D67;16D6A 16D67;16D63 16D67 16D67 16D67; # (𖵪𖵧; 𖵪𖵧; 𖵪𖵧; 𖵪𖵧; 𖵪𖵧; ) KIRAT RAI VOWEL SIGN AA, KIRAT RAI VOWEL SIGN AI, KIRAT RAI VOWEL SIGN E +# KIRAT RAI VOWEL SIGN AA, KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN E+KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN E +# Link in KIRAT RAI VOWEL SIGN AI 16D69 16D68 16D67;16D6A 16D68;16D63 16D67 16D67 16D67 16D67;16D6A 16D68;16D63 16D67 16D67 16D67 16D67; # (𖵪𖵨; 𖵪𖵨; 𖵪𖵨; 𖵪𖵨; 𖵪𖵨; ) KIRAT RAI VOWEL SIGN O, KIRAT RAI VOWEL SIGN AI, KIRAT RAI VOWEL SIGN E +# Link in KIRAT RAI VOWEL SIGN AI 16D63 16D67 16D68 16D67;16D6A 16D68;16D63 16D67 16D67 16D67 16D67;16D6A 16D68;16D63 16D67 16D67 16D67 16D67; # (𖵪𖵨; 𖵪𖵨; 𖵪𖵨; 𖵪𖵨; 𖵪𖵨; ) KIRAT RAI VOWEL SIGN AA, KIRAT RAI VOWEL SIGN E, KIRAT RAI VOWEL SIGN AI, KIRAT RAI VOWEL SIGN E # # EOF diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java index 6fe566b49..cb38bf126 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateData.java @@ -948,9 +948,10 @@ public static void writeNormalizerTestSuite(String directory, String fileName) log.println("@Part4 # Canonical closures (excluding Hangul)"); log.println("#"); - final Map decompositions = new TreeMap(); - final Map> decomposablesByFirstCodePoint = new TreeMap(); - final Map> decomposablesByLastCodePoint = new TreeMap(); + final Map decompositions = new TreeMap<>(); + final Set compositions = new TreeSet<>(); + final Map> composablesByFirstCodePoint = new TreeMap<>(); + final Map> composablesByLastCodePoint = new TreeMap<>(); for (int cp = 0; cp <= 0x10FFFF; ++cp) { if (cp >= 0xAC00 && cp <= 0xD7A3) { continue; @@ -959,13 +960,14 @@ public static void writeNormalizerTestSuite(String directory, String fileName) String decomposition = Default.nfd().normalize(cp); if (!decomposition.equals(c)) { decompositions.put(c, decomposition); - if (decomposition.codePointCount(0, decomposition.length()) > 1) { + if (Default.nfc().normalize(c).equals(c)) { + compositions.add(decomposition); int first = decomposition.codePointAt(0); int last = decomposition.codePointBefore(decomposition.length()); - decomposablesByFirstCodePoint + composablesByFirstCodePoint .computeIfAbsent(first, key -> new TreeSet<>()) .add(c); - decomposablesByLastCodePoint + composablesByLastCodePoint .computeIfAbsent(last, key -> new TreeSet<>()) .add(c); } @@ -1000,12 +1002,14 @@ public static void writeNormalizerTestSuite(String directory, String fileName) } } + System.out.println("Writing Part 5"); + log.println("#"); + log.println("@Part5 # Chained compositions"); + log.println("#"); + Set links = new TreeSet<>(); - for (String decomposition : decompositions.values()) { + for (String decomposition : compositions) { int first = decomposition.codePointAt(0); - if (decomposition.length() == UTF16.getCharCount(first)) { - continue; - } int second; for (int i = UTF16.getCharCount(first); i < decomposition.length(); @@ -1018,15 +1022,17 @@ public static void writeNormalizerTestSuite(String directory, String fileName) for (String link : links) { int first = link.codePointAt(0); int second = link.codePointBefore(link.length()); - if (decomposablesByLastCodePoint.containsKey(first) - && decomposablesByFirstCodePoint.containsKey(second)) { + if (composablesByLastCodePoint.containsKey(first) + && composablesByFirstCodePoint.containsKey(second)) { + log.println("# Linking on " + Default.ucd().getName(first) + "+" + Default.ucd().getName(second)); System.out.println( Default.ucd().getName(first) + "+" + Default.ucd().getName(second) + "?"); - for (String firstCandidate : decomposablesByLastCodePoint.get(first)) { - for (String secondCandidate : decomposablesByFirstCodePoint.get(second)) { + for (String firstCandidate : composablesByLastCodePoint.get(first)) { + for (String secondCandidate : composablesByFirstCodePoint.get(second)) { String firstDecomposition = Default.nfd().normalize(firstCandidate); String secondDecomposition = Default.nfd().normalize(secondCandidate); String decomposition = firstDecomposition + secondDecomposition; + log.println("# " + Default.ucd().getName(firstDecomposition) + "+" + Default.ucd().getName(secondDecomposition)); System.out.println( Default.ucd().getName(firstCandidate) + "+" @@ -1055,7 +1061,9 @@ public static void writeNormalizerTestSuite(String directory, String fileName) if (!s.equals(decomposition) && Default.nfd() .normalize(s) - .equals(decomposition)) { + .equals(decomposition) + && s.codePoints().anyMatch(cp -> Default.nfd() + .normalize(cp).contains(link))) { for (int j = 0; j < s.length(); ++j) { if (Default.nfd() .normalize(s.substring(0, j)) @@ -1066,6 +1074,10 @@ public static void writeNormalizerTestSuite(String directory, String fileName) return; } } + s.codePoints().forEach(cp -> { if (Default.nfd() + .normalize(cp).contains(link)) { + log.println("# Link in " + Default.ucd().getName(cp)); + }}); writeLine(s, log, true); System.out.println(Default.ucd().getName(s)); }