From 60104953fc439987b812db62e555a18895a34de7 Mon Sep 17 00:00:00 2001 From: macchiati Date: Thu, 15 Aug 2024 18:34:30 -0700 Subject: [PATCH] CLDR-17535 General cleanup --- common/supplemental/supplementalData.xml | 10 +- .../cldr/tool/GenerateLikelySubtags.java | 248 ++++++++++-------- .../java/org/unicode/cldr/tool/LSRSource.java | 111 ++++++++ .../org/unicode/cldr/tool/LangTagsData.java | 182 +++++++------ .../org/unicode/cldr/util/CLDRLocale.java | 12 + .../cldr/util/LanguageTagCanonicalizer.java | 31 ++- .../unicode/cldr/util/LocaleScriptInfo.java | 6 +- .../cldr/util/SupplementalDataInfo.java | 2 +- 8 files changed, 403 insertions(+), 199 deletions(-) create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index 84a21e51c99..b6cdfb0e08b 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -1884,7 +1884,7 @@ XXX Code for transations where no currency is involved - + @@ -2068,7 +2068,7 @@ XXX Code for transations where no currency is involved - + @@ -2266,10 +2266,10 @@ XXX Code for transations where no currency is involved - + - + @@ -2298,7 +2298,7 @@ XXX Code for transations where no currency is involved - + diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java index 23dc80316f4..492bcadaad4 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java @@ -30,7 +30,7 @@ import org.unicode.cldr.draft.ScriptMetadata.Info; import org.unicode.cldr.tool.GenerateMaximalLocales.LocaleOverride; import org.unicode.cldr.tool.GenerateMaximalLocales.LocaleStringComparator; -import org.unicode.cldr.tool.LangTagsData.LSRSource; +import org.unicode.cldr.tool.LangTagsData.Errors; import org.unicode.cldr.tool.Option.Options; import org.unicode.cldr.tool.Option.Params; import org.unicode.cldr.util.CLDRConfig; @@ -101,6 +101,7 @@ private enum MyOptions { population(new Params().setHelp("Show population data used")), order(new Params().setHelp("Show the priority order for langauge data")), debug(new Params().setHelp("Show other debug info")), + json(new Params().setHelp("Show json error data")), watch( new Params() .setHelp( @@ -154,6 +155,11 @@ public static void main(String[] args) throws IOException { .forEach(x -> temp.put(x, getTypeFromCasedSubtag(x))); WATCH_PAIRS = ImmutableMap.copyOf(temp); } + boolean json = MyOptions.json.option.doesOccur(); + if (json) { + Errors jsonErrors = LangTagsData.getProcessErrors(); + jsonErrors.printAll(); + } Map old = supplementalData.getLikelySubtags(); Map oldOrigins = supplementalData.getLikelyOrigins(); @@ -181,6 +187,13 @@ public static void main(String[] args) throws IOException { String newValue = result.get(source); String removal = itemsRemoved.get(source); + if (newValue == null) { + LSRSource silValue = silData.get(source); + if (silValue != null) { + newValue = silValue.getLsrString(); + } + } + if (Objects.equal(oldValue, newValue)) { continue; } @@ -384,8 +397,8 @@ public static void main(String[] args) throws IOException { {"und_005", "pt_Latn_BR"}, {"vo", "vo_Latn_001"}, {"vo_Latn", "vo_Latn_001"}, - {"yi", "yi_Hebr_001"}, - {"yi_Hebr", "yi_Hebr_001"}, + // {"yi", "yi_Hebr_001"}, + // {"yi_Hebr", "yi_Hebr_001"}, {"yue", "yue_Hant_HK"}, {"yue_Hant", "yue_Hant_HK"}, {"yue_Hans", "yue_Hans_CN"}, @@ -441,6 +454,13 @@ public static void main(String[] args) throws IOException { {"oc_ES", "oc_Latn_ES"}, {"os", "os_Cyrl_GE"}, {"os_Cyrl", "os_Cyrl_GE"}, + + // new additions for compatibility with old + {"und_419", "es_Latn_419"}, + {"und_ZM", "bem_Latn_ZM"}, + {"und_CC", "ms_Arab_CC"}, + {"und_SL", "kri_Latn_SL"}, + {"und_SS", "ar_Arab_SS"}, }); /** @@ -630,6 +650,8 @@ enum OutputStyle { private static final Joiner JOIN_SPACE = Joiner.on(' '); + private static final Joiner JOIN_LS = Joiner.on(CldrUtility.LINE_SEPARATOR); + private static Map generatePopulationData(Map toMaximized) { // we are going to try a different approach. // first gather counts for maximized values @@ -699,8 +721,13 @@ private static Map generatePopulationData(Map to if (script == null) { script = LocaleScriptInfo.getScriptFromLocaleOrSupplemental(writtenLanguage); if (script == null) { - noPopulationData.add(writtenLanguage); - continue; + LSRSource silLSR = silData.get(writtenLanguage); + if (silLSR != null) { + script = silLSR.getScript(); + } else { + noPopulationData.add(writtenLanguage); + continue; + } } localeToScriptCache.put(writtenLanguage, script); } @@ -714,22 +741,7 @@ private static Map generatePopulationData(Map to } } if (!noPopulationData.isEmpty()) { - System.out.println("script data to add"); - Set stillBad = new TreeSet<>(); for (String lang : noPopulationData) { - LSRSource silLSR = silData.get(lang); - if (silLSR == null) { - stillBad.add(lang); - } else { - System.out.println( - " "); - } - } - for (String lang : stillBad) { System.out.println( JOIN_TAB.join("No script in pop. data for", lang, getNameSafe(lang))); } @@ -1287,6 +1299,15 @@ public static String printingName(String locale, Joiner spacing) { : english.getName(CLDRFile.TERRITORY_NAME, region))); } + static final String SEPARATOR = + OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT + ? CldrUtility.LINE_SEPARATOR + : "\t"; + static final Joiner spacing = + Joiner.on(OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : "‧").useForNull("∅"); + + static final String arrow = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t⇒\t" : "\t➡ "; + private static File printLikelySubtags(Map fluffup) throws IOException { final File genDir = new File(CLDRPaths.GEN_DIRECTORY, "supplemental"); final File genFile = @@ -1296,45 +1317,24 @@ private static File printLikelySubtags(Map fluffup) throws IOExc System.out.println("Writing to " + genFile); // set based on above - final String SEPARATOR = - OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT - ? CldrUtility.LINE_SEPARATOR - : "\t"; - Joiner spacing = - Joiner.on(OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : " ‧ ").useForNull("∅"); - - final String arrow = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t⇒\t" : "\t➡ "; - try (PrintWriter out = FileUtilities.openUTF8Writer(genFile)) { String header = OUTPUT_STYLE != OutputStyle.XML ? "const MapToMaximalSubtags default_subtags[] = {" - : "" - + CldrUtility.LINE_SEPARATOR - + "" - + CldrUtility.LINE_SEPARATOR - + "" - + CldrUtility.LINE_SEPARATOR - + "" - + CldrUtility.LINE_SEPARATOR - + "" - + CldrUtility.LINE_SEPARATOR - + " " - + CldrUtility.LINE_SEPARATOR - + " "; + : JOIN_LS.join( + "", + "", + "", + "", + "", + " ", + " "); String footer = OUTPUT_STYLE != OutputStyle.XML ? SEPARATOR + "};" @@ -1343,61 +1343,101 @@ private static File printLikelySubtags(Map fluffup) throws IOExc + ""; out.println(header); boolean first = true; - Set keys = new TreeSet<>(new LocaleStringComparator()); - keys.addAll(fluffup.keySet()); - for (String printingLocale : keys) { - String printingTarget = fluffup.get(printingLocale); - String comment = - printingName(printingLocale, spacing) - + arrow - + printingName(printingTarget, spacing); - - if (OUTPUT_STYLE == OutputStyle.XML) { - out.println( - "\t\t" - + "\t\t" - + ""); - } else { - if (first) { - first = false; - } else { - out.print(","); - } - if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) { - comment = - printingName(printingLocale, spacing) - + SEPARATOR - + " // " - + arrow - + printingName(printingTarget, spacing); + printLine(fluffup, Map.of(), first, out); + + if (OUTPUT_STYLE == OutputStyle.XML) { + out.println(" "); + } + + // Now add from silData + // filter to only languages that are not already in + Map silMap = new HashMap<>(); + Map silOrigins = new HashMap<>(); + + for (Entry entry : silData.entrySet()) { + CLDRLocale source = CLDRLocale.getInstance(entry.getKey()); + String lang = source.getLanguage(); + if (!fluffup.containsKey(lang)) { + silMap.put(entry.getKey(), entry.getValue().getLsrString()); + if (!entry.getValue().getSources().isEmpty()) { + silOrigins.put(entry.getKey(), entry.getValue().getSourceString()); } - out.print( - " {" - + SEPARATOR - + " // " - + comment - + SEPARATOR - + " \"" - + printingLocale - + "\"," - + SEPARATOR - + " \"" - + printingTarget - + "\"" - + CldrUtility.LINE_SEPARATOR - + " }"); } } + printLine(silMap, silOrigins, first, out); + out.println(footer); out.close(); } return genFile; } + + public static void printLine( + Map toPrint, + Map origins, + boolean first, + PrintWriter out) { + Set keys = new TreeSet<>(new LocaleStringComparator()); + keys.addAll(toPrint.keySet()); + boolean noUndYet = true; + for (String printingLocale : keys) { + String printingTarget = toPrint.get(printingLocale); + String origin = origins.get(printingLocale); + String comment = + printingName(printingLocale, spacing) + + arrow + + printingName(printingTarget, spacing); + + if (OUTPUT_STYLE == OutputStyle.XML) { + if (noUndYet) { + if (printingLocale.startsWith("und")) { + noUndYet = false; + out.println( + " "); + } + } + out.println( + "\t\t" + + "\t\t" + + ""); + } else { + if (first) { + first = false; + } else { + out.print(","); + } + if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) { + comment = + printingName(printingLocale, spacing) + + SEPARATOR + + " // " + + arrow + + printingName(printingTarget, spacing); + } + out.print( + " {" + + SEPARATOR + + " // " + + comment + + SEPARATOR + + " \"" + + printingLocale + + "\"," + + SEPARATOR + + " \"" + + printingTarget + + "\"" + + CldrUtility.LINE_SEPARATOR + + " }"); + } + } + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java new file mode 100644 index 00000000000..1699203fcf4 --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java @@ -0,0 +1,111 @@ +package org.unicode.cldr.tool; + +import com.google.common.base.Joiner; +import com.google.common.base.Splitter; +import com.google.common.collect.Comparators; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableSortedSet; +import java.util.Comparator; +import java.util.Objects; +import java.util.Set; +import org.unicode.cldr.util.CLDRConfig; +import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRLocale; + +public class LSRSource implements Comparable { + private static final Joiner JOIN_SPACE = Joiner.on(' '); + private static final Splitter SPLIT_SPACE = Splitter.on(' ').omitEmptyStrings(); + private final CLDRLocale cldrLocale; + private final Set sources; + + LSRSource(String lang, String script, String region, String sources) { + cldrLocale = CLDRLocale.getInstance(lang, script, region); + this.sources = ImmutableSortedSet.copyOf(SPLIT_SPACE.splitToList(sources)); + } + + public String getLanguage() { + return cldrLocale.getLanguage(); + } + + public String getScript() { + return cldrLocale.getScript(); + } + + public String getRegion() { + return cldrLocale.getRegion(); + } + + public Set getSources() { + return sources; + } + + public String getLsrString() { + return cldrLocale.toString(); + } + + @Override + public int compareTo(LSRSource other) { + return ComparisonChain.start() + .compare(cldrLocale, other.cldrLocale) + .compare( + sources, + other.sources, + Comparators.lexicographical(Comparator.naturalOrder())) + .result(); + } + + @Override + public int hashCode() { + return Objects.hash(cldrLocale, sources); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (!(obj instanceof LSRSource)) return false; + + LSRSource other = (LSRSource) obj; + return Objects.equals(cldrLocale, other.cldrLocale) + && Objects.equals(sources, other.sources); + } + + @Override + public String toString() { + return cldrLocale.toString() + " // " + getSources(); + } + + public String line(String source) { + final CLDRFile english = CLDRConfig.getInstance().getEnglish(); + + // + // + final String target = cldrLocale.toString(); + final String result = + "" + + "\t"; + return result; + } + + public String getSourceString() { + return JOIN_SPACE.join(getSources()); + } + + public CLDRLocale getCldrLocale() { + return cldrLocale; + } + + // public static String combineLSR(String lang, String script, String region) { + // return (lang.isEmpty() ? "und" : lang) + // + (script.isEmpty() ? "" : "_" + script) + // + (region.isEmpty() ? "" : "_" + region); + // } +} diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java index 7ddfbbd863f..488e9f41641 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java @@ -1,10 +1,10 @@ package org.unicode.cldr.tool; import com.google.common.base.Splitter; +import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; -import com.ibm.icu.impl.Row; import com.ibm.icu.util.Output; import java.io.IOException; import java.io.UncheckedIOException; @@ -12,6 +12,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -21,9 +22,12 @@ import java.util.regex.Pattern; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRLocale; import org.unicode.cldr.util.CLDRPaths; +import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Iso639Data; import org.unicode.cldr.util.Iso639Data.Type; +import org.unicode.cldr.util.LanguageTagCanonicalizer; import org.unicode.cldr.util.LanguageTagParser; import org.unicode.cldr.util.StandardCodes.LstrType; import org.unicode.cldr.util.Validity; @@ -40,7 +44,7 @@ public class LangTagsData { private final Validity validity = Validity.getInstance(); private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); - private static final CLDRFile english = CLDR_CONFIG.getEnglish(); + static final CLDRFile english = CLDR_CONFIG.getEnglish(); private static final LangTagsData INSTANCE = new LangTagsData(); @@ -84,6 +88,7 @@ private Map readJson() { Output lastFull = new Output<>(); Map result = new TreeMap<>(); + LanguageTagCanonicalizer langCanoner = new LanguageTagCanonicalizer(null); try { Files.lines(path) .forEach( @@ -99,20 +104,42 @@ private Map readJson() { lastFull.value = value; break; case "tag": + if (lastFull.value == null) { + break; + } try { - String fullLang = - ltpFull.set(lastFull.value).getLanguage(); + ltpFull.set(lastFull.value); + ltpTag.set(value); if (isIllFormed(lastFull.value, ltpFull) - || isIllFormed(value, ltpTag.set(value))) { + || isIllFormed(value, ltpTag)) { processErrors.put( Errors.Type.ill_formed_tags, value, lastFull.value, ""); } else { - String reference = SIL; + final String fixedTag = + langCanoner.transform(value); + final String fixedFull = + langCanoner.transform(lastFull.value); + if (!fixedTag.equals(value) + || !fixedFull.equals(lastFull.value)) { + processErrors.put( + Errors.Type.canonicalizing, + value, + lastFull.value, + "mapped to: " + + fixedTag + + " ➡ " + + fixedFull); + ltpTag.set(fixedTag); + ltpFull.set(fixedFull); + } + String fullLang = ltpFull.getLanguage(); final String fullScript = ltpFull.getScript(); String fullRegion = ltpFull.getRegion(); + + String reference = SIL; if (fullRegion.equals("ZZ") || fullRegion.equals("001")) { Collection tempRegions = @@ -125,11 +152,18 @@ private Map readJson() { } } - String tagLang = ltpTag.getLanguage(); - String tagScript = ltpTag.getScript(); - String tagRegion = ltpTag.getRegion(); + final String tagLang = ltpTag.getLanguage(); + final String tagScript = ltpTag.getScript(); + final String tagRegion = ltpTag.getRegion(); - if (!tagLang.equals(fullLang) + if (!tagScript.isEmpty() + && !tagRegion.isEmpty()) { + processErrors.put( + Errors.Type.tag_is_full, + value, + lastFull.value, + ""); + } else if (!tagLang.equals(fullLang) || (!tagScript.isEmpty() && !tagScript.equals( fullScript)) @@ -149,7 +183,7 @@ private Map readJson() { errors)) { add( result, - value, + fixedTag, fullLang, fullScript, fullRegion, @@ -176,7 +210,55 @@ private Map readJson() { } } }); - return result; + + // check for items that need context + + Set toRemove = new LinkedHashSet<>(); + for (Entry entry : result.entrySet()) { + // if we have lang_script or lang_region, we must have lang + final String source = entry.getKey(); + if (source.equals("lfn_Cyrl")) { + int debug = 0; + } + if (source.contains("_")) { + // we have either aaa_Dddd or aaa_EEE (we know the source can't have 3 fields) + CLDRLocale clocale = CLDRLocale.getInstance(source); + final String language = clocale.getLanguage(); + LSRSource fullForLanguage = result.get(language); + if (fullForLanguage == null) { + toRemove.add(source); + processErrors.put( + Errors.Type.language_of_tag_missing, + source, + entry.getValue().getLsrString(), + "but no mapping for " + language); + } else { + CLDRLocale targetForLanguage = fullForLanguage.getCldrLocale(); + CLDRLocale target = entry.getValue().getCldrLocale(); + // The missing value in LSRSource must not be the same as what would come in + // that is, if we have aaa => aaa_Bbbb_CC, then we cannot have: + // aaa_Dddd => aaa_Dddd_CC, nor + // aaa_EE => aaa_Bbbb_EE, nor + if (target.getLanguage().equals(targetForLanguage.getLanguage()) + || target.getScript().equals(targetForLanguage.getScript())) { + toRemove.add(source); + processErrors.put( + Errors.Type.redundant_mapping, + source, + entry.getValue().getLsrString(), + "because: " + language + " ➡ " + targetForLanguage); + } + } + } + } + for (String badKey : toRemove) { + result.remove(badKey); + } + + // protect the results + + processErrors.data = CldrUtility.protectCollection(processErrors.data); + return CldrUtility.protectCollection(result); } catch (IOException ex) { throw new UncheckedIOException(ex); } @@ -235,7 +317,7 @@ private Multimap readWikidata() { } catch (IOException ex) { throw new UncheckedIOException(ex); } - return result; + return ImmutableMultimap.copyOf(result); } private void add( @@ -254,13 +336,18 @@ private void add( result.put(source, newVersion); } - private static class Errors { + public static class Errors { public enum Type { ill_formed_tags("Ill-formed tags"), already_CLDR("Language already in CLDR"), tag_not_in_full("tag ⊄ full"), exception("exception"), - skipping_scope("Skipping scope, SIL"); + skipping_scope("Skipping scope, SIL"), + tag_is_full("Tag must not have both script and region"), + language_of_tag_missing("Missing tag for just the language"), + redundant_mapping( + "aaa => aaa_Bbbb_CC makes redundant aaa_Dddd => aaa_Dddd_CC & aaa_EE => aaa_Bbbb_EE"), + canonicalizing("either the source or target are not canonical"); private final String printable; @@ -269,7 +356,11 @@ private Type(String printable) { } } - public Multimap data = TreeMultimap.create(); + private Multimap data = TreeMultimap.create(); + + public Multimap getData() { + return data; + } public void put( Type illFormedTags, String tagValue, String fullValue, String errorMessage) { @@ -293,63 +384,4 @@ public void printAll() { } } } - - static class LSRSource implements Comparable { - final Row.R4 data; - - LSRSource(String lang, String script, String region, String source) { - if (script.contains("Soyo") || region.contains("Soyo")) { - int debug = 0; - } - data = Row.of(lang, script, region, source); - data.freeze(); - } - - @Override - public String toString() { - return combineLSR(data.get0(), data.get1(), data.get2()) + " // " + data.get3(); - } - - @Override - public int compareTo(LSRSource o) { - return data.compareTo(o.data); - } - - @Override - public int hashCode() { - return data.hashCode(); - } - - @Override - public boolean equals(Object obj) { - return data.equals(obj); - } - - public String line(String source) { - // TODO Auto-generated method stub - // - // - final String target = combineLSR(data.get0(), data.get1(), data.get2()); - final String origin = data.get3(); - final String result = - "" - + "\t"; - return result; - } - - public static String combineLSR(String lang, String script, String region) { - return lang - + (script.isEmpty() ? "" : "_" + script) - + (region.isEmpty() ? "" : "_" + region); - } - } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java index 37a59e943f0..5bfdf3d0472 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java @@ -672,4 +672,16 @@ public int getRank() { return 1 + getParent().getRank(); } } + + // Non-optimized version for now + public static CLDRLocale getInstance(String lang, String script, String region) { + return getInstance( + (lang.isEmpty() || lang.equals("root") ? "und" : lang) + + (script.isEmpty() ? "" : "_" + script) + + (region.isEmpty() ? "" : "_" + region)); + } + + public String getRegion() { + return getCountry(); + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java index a56ba239a5b..1c2c171a01c 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java @@ -29,24 +29,35 @@ public class LanguageTagCanonicalizer implements StringTransform { private final LanguageTagParser ltp1 = new LanguageTagParser(); private final LanguageTagParser ltp2 = new LanguageTagParser(); + /** Use a parameter to specify LIKELY_FAVOR_SCRIPT or no minimization */ public LanguageTagCanonicalizer() { this(LstrType.script); } + @Deprecated public LanguageTagCanonicalizer(boolean favorRegion) { this(favorRegion ? LstrType.region : LstrType.script); } - public LanguageTagCanonicalizer(LstrType lstrType) { - switch (lstrType) { - case region: - likely = LIKELY_FAVOR_REGION; - break; - case script: - likely = LIKELY_FAVOR_SCRIPT; - break; - default: - likely = null; + /** + * Choose the style of minimization, or null for none. + * + * @param minimizationTypeOrNull + */ + public LanguageTagCanonicalizer(LstrType minimizationTypeOrNull) { + if (minimizationTypeOrNull == null) { + likely = null; // don't minimize. + } else { + switch (minimizationTypeOrNull) { + case region: + likely = LIKELY_FAVOR_REGION; + break; + case script: + likely = LIKELY_FAVOR_SCRIPT; + break; + default: + likely = null; + } } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java index d5c30317235..a22429a3c86 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java @@ -70,6 +70,7 @@ public static String getScriptFromLocale(String locale) { /** * Get the script code (aka short property name, like Latn) from the supplemental languageData. + * Take the first one if there are 2. * * @param locale * @return null if fails @@ -82,10 +83,7 @@ public static String getScriptFromSupplementalData(String locale) { for (BasicLanguageData datum : basicLanguageData.values()) { final Set scripts = datum.getScripts(); boolean isPrimary = datum.getType() == BasicLanguageData.Type.primary; - if (scripts.size() != 1) { - if (scripts.size() > 1 && isPrimary) { - break; - } + if (scripts.isEmpty()) { continue; } String script = scripts.iterator().next(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java index 0be181a193c..a1491b31f63 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java @@ -451,7 +451,7 @@ public BasicLanguageData addScript(String script) { throw new IllegalArgumentException("Illegal Script: " + script); } if (scripts == Collections.EMPTY_SET) { - scripts = new TreeSet<>(); + scripts = new LinkedHashSet<>(); // retain order } scripts.add(script); return this;