From c5d48a81cd6752cf6a20f0d5e84f850c5d8d9bac Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Thu, 1 Feb 2024 01:20:27 +0000 Subject: [PATCH] UTC-178-A13: jg long alias Teh_Marbuta_Goal (#676) mostly by Robin fixing the generator code and making it less hacky --------- Co-authored-by: Robin Leroy --- .../data/ucd/dev/PropertyValueAliases.txt | 4 +- .../org/unicode/props/UcdPropertyValues.java | 2 +- .../unicode/text/UCD/MakeUnicodeFiles.java | 7 +-- .../text/UCD/ToolUnicodePropertySource.java | 50 +++++++++---------- 4 files changed, 30 insertions(+), 33 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 366e7a6c4..f7dc7db04 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2024-01-23, 01:51:24 GMT +# Date: 2024-01-31 # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -1085,7 +1085,7 @@ jg ; Syriac_Waw ; Syriac_Waw jg ; Tah ; Tah jg ; Taw ; Taw jg ; Teh_Marbuta ; Teh_Marbuta -jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teh_Marbuta_Goal ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal jg ; Teth ; Teth jg ; Thin_Yeh ; Thin_Yeh jg ; Vertical_Tail ; Vertical_Tail diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 18efb18e4..8423533b1 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -1305,7 +1305,7 @@ public enum Joining_Group_Values implements Named { Tah("Tah"), Taw("Taw"), Teh_Marbuta("Teh_Marbuta"), - Hamza_On_Heh_Goal("Teh_Marbuta_Goal"), + Teh_Marbuta_Goal("Teh_Marbuta_Goal", "Hamza_On_Heh_Goal"), Teth("Teth"), Thin_Yeh("Thin_Yeh"), Vertical_Tail("Vertical_Tail"), diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java index c972a2cba..ac8712468 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/MakeUnicodeFiles.java @@ -1054,13 +1054,10 @@ public static void generateValueAliasFile(String filename) throws IOException { // HACK Tabber mt = mt2; if (l.size() == 1) { - if (propName.equals("Canonical_Combining_Class")) { - continue; - } l.add(0, l.get(0)); // double up } else if (propName.equals("Canonical_Combining_Class")) { - if (l.size() == 2) { - l.add(l.get(1)); // double up final value + if (l.get(1).equals(l.get(0)) && l.get(2).equals(l.get(0))) { + continue; } mt = mt3; } else if (l.size() == 2 && propName.equals("Decomposition_Type")) { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index a322758f0..1615491b4 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -2007,19 +2007,21 @@ public List _getValueAliases(String valueAlias, List result) { UCD_Names.EXTRA_GENERAL_CATEGORY, result); case UCD_Types.COMBINING_CLASS >> 8: - addUnique( - String.valueOf( - Utility.lookupShort( - valueAlias, - UCD_Names.LONG_COMBINING_CLASS, - true)), - result); - return lookup( - valueAlias, - UCD_Names.LONG_COMBINING_CLASS, - UCD_Names.COMBINING_CLASS, - null, - result); + // The `lookup` function does lookup by long value, and returns + // (long, short, additional aliases). + // For CCC we want to support lookup by long value here, but to + // return (numeric, short, long). + short numericCCC = + Utility.lookupShort( + valueAlias, UCD_Names.LONG_COMBINING_CLASS, true); + result.add(Short.toString(numericCCC)); + result.add( + Utility.getUnskeleton( + UCD_Names.COMBINING_CLASS[numericCCC], true)); + result.add( + Utility.getUnskeleton( + UCD_Names.LONG_COMBINING_CLASS[numericCCC], true)); + return result; case UCD_Types.BIDI_CLASS >> 8: return lookup( valueAlias, @@ -2028,17 +2030,11 @@ public List _getValueAliases(String valueAlias, List result) { null, result); case UCD_Types.DECOMPOSITION_TYPE >> 8: - lookup( - valueAlias, - UCD_Names.LONG_DECOMPOSITION_TYPE, - FIXED_DECOMPOSITION_TYPE, - null, - result); return lookup( valueAlias, UCD_Names.LONG_DECOMPOSITION_TYPE, - UCD_Names.DECOMPOSITION_TYPE, - null, + TITLECASE_SHORT_DECOMPOSITION_TYPE, + LONG_TO_LOWERCASE_SHORT_DECOMPOSITION_TYPE, result); case UCD_Types.NUMERIC_TYPE >> 8: return lookup( @@ -2285,9 +2281,9 @@ static List lookup( // System.out.println("=>" + aux[pos]); if (aux != null) { final int pos = Utility.lookupShort(valueAlias, main, true); - UnicodeProperty.addUnique(aux[pos], result); + result.add(aux[pos]); } - UnicodeProperty.addUnique(valueAlias, result); + result.add(valueAlias); if (aux2 != null) { final Set xtra = aux2.getAll(valueAlias); if (xtra != null) { @@ -2384,13 +2380,17 @@ static boolean isWellFormedLanguageTag(String tag) { YNTF.putAll("Maybe", Arrays.asList(MAYBE_VALUES)); } - private static final String[] FIXED_DECOMPOSITION_TYPE = + private static final String[] TITLECASE_SHORT_DECOMPOSITION_TYPE = new String[UCD_Names.DECOMPOSITION_TYPE.length]; + private static final Relation LONG_TO_LOWERCASE_SHORT_DECOMPOSITION_TYPE = + new Relation(new HashMap>(), LinkedHashSet.class); static { for (int i = 0; i < UCD_Names.DECOMPOSITION_TYPE.length; ++i) { - FIXED_DECOMPOSITION_TYPE[i] = + TITLECASE_SHORT_DECOMPOSITION_TYPE[i] = Utility.getUnskeleton(UCD_Names.DECOMPOSITION_TYPE[i], true); + LONG_TO_LOWERCASE_SHORT_DECOMPOSITION_TYPE.put( + UCD_Names.LONG_DECOMPOSITION_TYPE[i], UCD_Names.DECOMPOSITION_TYPE[i]); } }