diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml
index 84a21e51c99..b6cdfb0e08b 100644
--- a/common/supplemental/supplementalData.xml
+++ b/common/supplemental/supplementalData.xml
@@ -1884,7 +1884,7 @@ XXX Code for transations where no currency is involved
-
+
@@ -2068,7 +2068,7 @@ XXX Code for transations where no currency is involved
-
+
@@ -2266,10 +2266,10 @@ XXX Code for transations where no currency is involved
-
+
-
+
@@ -2298,7 +2298,7 @@ XXX Code for transations where no currency is involved
-
+
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java
index 23dc80316f4..492bcadaad4 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java
@@ -30,7 +30,7 @@
import org.unicode.cldr.draft.ScriptMetadata.Info;
import org.unicode.cldr.tool.GenerateMaximalLocales.LocaleOverride;
import org.unicode.cldr.tool.GenerateMaximalLocales.LocaleStringComparator;
-import org.unicode.cldr.tool.LangTagsData.LSRSource;
+import org.unicode.cldr.tool.LangTagsData.Errors;
import org.unicode.cldr.tool.Option.Options;
import org.unicode.cldr.tool.Option.Params;
import org.unicode.cldr.util.CLDRConfig;
@@ -101,6 +101,7 @@ private enum MyOptions {
population(new Params().setHelp("Show population data used")),
order(new Params().setHelp("Show the priority order for langauge data")),
debug(new Params().setHelp("Show other debug info")),
+ json(new Params().setHelp("Show json error data")),
watch(
new Params()
.setHelp(
@@ -154,6 +155,11 @@ public static void main(String[] args) throws IOException {
.forEach(x -> temp.put(x, getTypeFromCasedSubtag(x)));
WATCH_PAIRS = ImmutableMap.copyOf(temp);
}
+ boolean json = MyOptions.json.option.doesOccur();
+ if (json) {
+ Errors jsonErrors = LangTagsData.getProcessErrors();
+ jsonErrors.printAll();
+ }
Map old = supplementalData.getLikelySubtags();
Map oldOrigins = supplementalData.getLikelyOrigins();
@@ -181,6 +187,13 @@ public static void main(String[] args) throws IOException {
String newValue = result.get(source);
String removal = itemsRemoved.get(source);
+ if (newValue == null) {
+ LSRSource silValue = silData.get(source);
+ if (silValue != null) {
+ newValue = silValue.getLsrString();
+ }
+ }
+
if (Objects.equal(oldValue, newValue)) {
continue;
}
@@ -384,8 +397,8 @@ public static void main(String[] args) throws IOException {
{"und_005", "pt_Latn_BR"},
{"vo", "vo_Latn_001"},
{"vo_Latn", "vo_Latn_001"},
- {"yi", "yi_Hebr_001"},
- {"yi_Hebr", "yi_Hebr_001"},
+ // {"yi", "yi_Hebr_001"},
+ // {"yi_Hebr", "yi_Hebr_001"},
{"yue", "yue_Hant_HK"},
{"yue_Hant", "yue_Hant_HK"},
{"yue_Hans", "yue_Hans_CN"},
@@ -441,6 +454,13 @@ public static void main(String[] args) throws IOException {
{"oc_ES", "oc_Latn_ES"},
{"os", "os_Cyrl_GE"},
{"os_Cyrl", "os_Cyrl_GE"},
+
+ // new additions for compatibility with old
+ {"und_419", "es_Latn_419"},
+ {"und_ZM", "bem_Latn_ZM"},
+ {"und_CC", "ms_Arab_CC"},
+ {"und_SL", "kri_Latn_SL"},
+ {"und_SS", "ar_Arab_SS"},
});
/**
@@ -630,6 +650,8 @@ enum OutputStyle {
private static final Joiner JOIN_SPACE = Joiner.on(' ');
+ private static final Joiner JOIN_LS = Joiner.on(CldrUtility.LINE_SEPARATOR);
+
private static Map generatePopulationData(Map toMaximized) {
// we are going to try a different approach.
// first gather counts for maximized values
@@ -699,8 +721,13 @@ private static Map generatePopulationData(Map to
if (script == null) {
script = LocaleScriptInfo.getScriptFromLocaleOrSupplemental(writtenLanguage);
if (script == null) {
- noPopulationData.add(writtenLanguage);
- continue;
+ LSRSource silLSR = silData.get(writtenLanguage);
+ if (silLSR != null) {
+ script = silLSR.getScript();
+ } else {
+ noPopulationData.add(writtenLanguage);
+ continue;
+ }
}
localeToScriptCache.put(writtenLanguage, script);
}
@@ -714,22 +741,7 @@ private static Map generatePopulationData(Map to
}
}
if (!noPopulationData.isEmpty()) {
- System.out.println("script data to add");
- Set stillBad = new TreeSet<>();
for (String lang : noPopulationData) {
- LSRSource silLSR = silData.get(lang);
- if (silLSR == null) {
- stillBad.add(lang);
- } else {
- System.out.println(
- " ");
- }
- }
- for (String lang : stillBad) {
System.out.println(
JOIN_TAB.join("No script in pop. data for", lang, getNameSafe(lang)));
}
@@ -1287,6 +1299,15 @@ public static String printingName(String locale, Joiner spacing) {
: english.getName(CLDRFile.TERRITORY_NAME, region)));
}
+ static final String SEPARATOR =
+ OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT
+ ? CldrUtility.LINE_SEPARATOR
+ : "\t";
+ static final Joiner spacing =
+ Joiner.on(OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : "‧").useForNull("∅");
+
+ static final String arrow = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t⇒\t" : "\t➡ ";
+
private static File printLikelySubtags(Map fluffup) throws IOException {
final File genDir = new File(CLDRPaths.GEN_DIRECTORY, "supplemental");
final File genFile =
@@ -1296,45 +1317,24 @@ private static File printLikelySubtags(Map fluffup) throws IOExc
System.out.println("Writing to " + genFile);
// set based on above
- final String SEPARATOR =
- OUTPUT_STYLE == OutputStyle.C || OUTPUT_STYLE == OutputStyle.C_ALT
- ? CldrUtility.LINE_SEPARATOR
- : "\t";
- Joiner spacing =
- Joiner.on(OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t" : " ‧ ").useForNull("∅");
-
- final String arrow = OUTPUT_STYLE == OutputStyle.PLAINTEXT ? "\t⇒\t" : "\t➡ ";
-
try (PrintWriter out = FileUtilities.openUTF8Writer(genFile)) {
String header =
OUTPUT_STYLE != OutputStyle.XML
? "const MapToMaximalSubtags default_subtags[] = {"
- : ""
- + CldrUtility.LINE_SEPARATOR
- + ""
- + CldrUtility.LINE_SEPARATOR
- + ""
- + CldrUtility.LINE_SEPARATOR
- + ""
- + CldrUtility.LINE_SEPARATOR
- + ""
- + CldrUtility.LINE_SEPARATOR
- + " "
- + CldrUtility.LINE_SEPARATOR
- + " ";
+ : JOIN_LS.join(
+ "",
+ "",
+ "",
+ "",
+ "",
+ " ",
+ " ");
String footer =
OUTPUT_STYLE != OutputStyle.XML
? SEPARATOR + "};"
@@ -1343,61 +1343,101 @@ private static File printLikelySubtags(Map fluffup) throws IOExc
+ "";
out.println(header);
boolean first = true;
- Set keys = new TreeSet<>(new LocaleStringComparator());
- keys.addAll(fluffup.keySet());
- for (String printingLocale : keys) {
- String printingTarget = fluffup.get(printingLocale);
- String comment =
- printingName(printingLocale, spacing)
- + arrow
- + printingName(printingTarget, spacing);
-
- if (OUTPUT_STYLE == OutputStyle.XML) {
- out.println(
- "\t\t"
- + "\t\t"
- + "");
- } else {
- if (first) {
- first = false;
- } else {
- out.print(",");
- }
- if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) {
- comment =
- printingName(printingLocale, spacing)
- + SEPARATOR
- + " // "
- + arrow
- + printingName(printingTarget, spacing);
+ printLine(fluffup, Map.of(), first, out);
+
+ if (OUTPUT_STYLE == OutputStyle.XML) {
+ out.println(" ");
+ }
+
+ // Now add from silData
+ // filter to only languages that are not already in
+ Map silMap = new HashMap<>();
+ Map silOrigins = new HashMap<>();
+
+ for (Entry entry : silData.entrySet()) {
+ CLDRLocale source = CLDRLocale.getInstance(entry.getKey());
+ String lang = source.getLanguage();
+ if (!fluffup.containsKey(lang)) {
+ silMap.put(entry.getKey(), entry.getValue().getLsrString());
+ if (!entry.getValue().getSources().isEmpty()) {
+ silOrigins.put(entry.getKey(), entry.getValue().getSourceString());
}
- out.print(
- " {"
- + SEPARATOR
- + " // "
- + comment
- + SEPARATOR
- + " \""
- + printingLocale
- + "\","
- + SEPARATOR
- + " \""
- + printingTarget
- + "\""
- + CldrUtility.LINE_SEPARATOR
- + " }");
}
}
+ printLine(silMap, silOrigins, first, out);
+
out.println(footer);
out.close();
}
return genFile;
}
+
+ public static void printLine(
+ Map toPrint,
+ Map origins,
+ boolean first,
+ PrintWriter out) {
+ Set keys = new TreeSet<>(new LocaleStringComparator());
+ keys.addAll(toPrint.keySet());
+ boolean noUndYet = true;
+ for (String printingLocale : keys) {
+ String printingTarget = toPrint.get(printingLocale);
+ String origin = origins.get(printingLocale);
+ String comment =
+ printingName(printingLocale, spacing)
+ + arrow
+ + printingName(printingTarget, spacing);
+
+ if (OUTPUT_STYLE == OutputStyle.XML) {
+ if (noUndYet) {
+ if (printingLocale.startsWith("und")) {
+ noUndYet = false;
+ out.println(
+ " ");
+ }
+ }
+ out.println(
+ "\t\t"
+ + "\t\t"
+ + "");
+ } else {
+ if (first) {
+ first = false;
+ } else {
+ out.print(",");
+ }
+ if (comment.length() > 70 && SEPARATOR.equals(CldrUtility.LINE_SEPARATOR)) {
+ comment =
+ printingName(printingLocale, spacing)
+ + SEPARATOR
+ + " // "
+ + arrow
+ + printingName(printingTarget, spacing);
+ }
+ out.print(
+ " {"
+ + SEPARATOR
+ + " // "
+ + comment
+ + SEPARATOR
+ + " \""
+ + printingLocale
+ + "\","
+ + SEPARATOR
+ + " \""
+ + printingTarget
+ + "\""
+ + CldrUtility.LINE_SEPARATOR
+ + " }");
+ }
+ }
+ }
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java
new file mode 100644
index 00000000000..1699203fcf4
--- /dev/null
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LSRSource.java
@@ -0,0 +1,111 @@
+package org.unicode.cldr.tool;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Splitter;
+import com.google.common.collect.Comparators;
+import com.google.common.collect.ComparisonChain;
+import com.google.common.collect.ImmutableSortedSet;
+import java.util.Comparator;
+import java.util.Objects;
+import java.util.Set;
+import org.unicode.cldr.util.CLDRConfig;
+import org.unicode.cldr.util.CLDRFile;
+import org.unicode.cldr.util.CLDRLocale;
+
+public class LSRSource implements Comparable {
+ private static final Joiner JOIN_SPACE = Joiner.on(' ');
+ private static final Splitter SPLIT_SPACE = Splitter.on(' ').omitEmptyStrings();
+ private final CLDRLocale cldrLocale;
+ private final Set sources;
+
+ LSRSource(String lang, String script, String region, String sources) {
+ cldrLocale = CLDRLocale.getInstance(lang, script, region);
+ this.sources = ImmutableSortedSet.copyOf(SPLIT_SPACE.splitToList(sources));
+ }
+
+ public String getLanguage() {
+ return cldrLocale.getLanguage();
+ }
+
+ public String getScript() {
+ return cldrLocale.getScript();
+ }
+
+ public String getRegion() {
+ return cldrLocale.getRegion();
+ }
+
+ public Set getSources() {
+ return sources;
+ }
+
+ public String getLsrString() {
+ return cldrLocale.toString();
+ }
+
+ @Override
+ public int compareTo(LSRSource other) {
+ return ComparisonChain.start()
+ .compare(cldrLocale, other.cldrLocale)
+ .compare(
+ sources,
+ other.sources,
+ Comparators.lexicographical(Comparator.naturalOrder()))
+ .result();
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(cldrLocale, sources);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) return true;
+ if (!(obj instanceof LSRSource)) return false;
+
+ LSRSource other = (LSRSource) obj;
+ return Objects.equals(cldrLocale, other.cldrLocale)
+ && Objects.equals(sources, other.sources);
+ }
+
+ @Override
+ public String toString() {
+ return cldrLocale.toString() + " // " + getSources();
+ }
+
+ public String line(String source) {
+ final CLDRFile english = CLDRConfig.getInstance().getEnglish();
+
+ //
+ //
+ final String target = cldrLocale.toString();
+ final String result =
+ ""
+ + "\t";
+ return result;
+ }
+
+ public String getSourceString() {
+ return JOIN_SPACE.join(getSources());
+ }
+
+ public CLDRLocale getCldrLocale() {
+ return cldrLocale;
+ }
+
+ // public static String combineLSR(String lang, String script, String region) {
+ // return (lang.isEmpty() ? "und" : lang)
+ // + (script.isEmpty() ? "" : "_" + script)
+ // + (region.isEmpty() ? "" : "_" + region);
+ // }
+}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java
index 7ddfbbd863f..488e9f41641 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/LangTagsData.java
@@ -1,10 +1,10 @@
package org.unicode.cldr.tool;
import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
-import com.ibm.icu.impl.Row;
import com.ibm.icu.util.Output;
import java.io.IOException;
import java.io.UncheckedIOException;
@@ -12,6 +12,7 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collection;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -21,9 +22,12 @@
import java.util.regex.Pattern;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
+import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
+import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Iso639Data;
import org.unicode.cldr.util.Iso639Data.Type;
+import org.unicode.cldr.util.LanguageTagCanonicalizer;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.Validity;
@@ -40,7 +44,7 @@ public class LangTagsData {
private final Validity validity = Validity.getInstance();
private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
- private static final CLDRFile english = CLDR_CONFIG.getEnglish();
+ static final CLDRFile english = CLDR_CONFIG.getEnglish();
private static final LangTagsData INSTANCE = new LangTagsData();
@@ -84,6 +88,7 @@ private Map readJson() {
Output lastFull = new Output<>();
Map result = new TreeMap<>();
+ LanguageTagCanonicalizer langCanoner = new LanguageTagCanonicalizer(null);
try {
Files.lines(path)
.forEach(
@@ -99,20 +104,42 @@ private Map readJson() {
lastFull.value = value;
break;
case "tag":
+ if (lastFull.value == null) {
+ break;
+ }
try {
- String fullLang =
- ltpFull.set(lastFull.value).getLanguage();
+ ltpFull.set(lastFull.value);
+ ltpTag.set(value);
if (isIllFormed(lastFull.value, ltpFull)
- || isIllFormed(value, ltpTag.set(value))) {
+ || isIllFormed(value, ltpTag)) {
processErrors.put(
Errors.Type.ill_formed_tags,
value,
lastFull.value,
"");
} else {
- String reference = SIL;
+ final String fixedTag =
+ langCanoner.transform(value);
+ final String fixedFull =
+ langCanoner.transform(lastFull.value);
+ if (!fixedTag.equals(value)
+ || !fixedFull.equals(lastFull.value)) {
+ processErrors.put(
+ Errors.Type.canonicalizing,
+ value,
+ lastFull.value,
+ "mapped to: "
+ + fixedTag
+ + " ➡ "
+ + fixedFull);
+ ltpTag.set(fixedTag);
+ ltpFull.set(fixedFull);
+ }
+ String fullLang = ltpFull.getLanguage();
final String fullScript = ltpFull.getScript();
String fullRegion = ltpFull.getRegion();
+
+ String reference = SIL;
if (fullRegion.equals("ZZ")
|| fullRegion.equals("001")) {
Collection tempRegions =
@@ -125,11 +152,18 @@ private Map readJson() {
}
}
- String tagLang = ltpTag.getLanguage();
- String tagScript = ltpTag.getScript();
- String tagRegion = ltpTag.getRegion();
+ final String tagLang = ltpTag.getLanguage();
+ final String tagScript = ltpTag.getScript();
+ final String tagRegion = ltpTag.getRegion();
- if (!tagLang.equals(fullLang)
+ if (!tagScript.isEmpty()
+ && !tagRegion.isEmpty()) {
+ processErrors.put(
+ Errors.Type.tag_is_full,
+ value,
+ lastFull.value,
+ "");
+ } else if (!tagLang.equals(fullLang)
|| (!tagScript.isEmpty()
&& !tagScript.equals(
fullScript))
@@ -149,7 +183,7 @@ private Map readJson() {
errors)) {
add(
result,
- value,
+ fixedTag,
fullLang,
fullScript,
fullRegion,
@@ -176,7 +210,55 @@ private Map readJson() {
}
}
});
- return result;
+
+ // check for items that need context
+
+ Set toRemove = new LinkedHashSet<>();
+ for (Entry entry : result.entrySet()) {
+ // if we have lang_script or lang_region, we must have lang
+ final String source = entry.getKey();
+ if (source.equals("lfn_Cyrl")) {
+ int debug = 0;
+ }
+ if (source.contains("_")) {
+ // we have either aaa_Dddd or aaa_EEE (we know the source can't have 3 fields)
+ CLDRLocale clocale = CLDRLocale.getInstance(source);
+ final String language = clocale.getLanguage();
+ LSRSource fullForLanguage = result.get(language);
+ if (fullForLanguage == null) {
+ toRemove.add(source);
+ processErrors.put(
+ Errors.Type.language_of_tag_missing,
+ source,
+ entry.getValue().getLsrString(),
+ "but no mapping for " + language);
+ } else {
+ CLDRLocale targetForLanguage = fullForLanguage.getCldrLocale();
+ CLDRLocale target = entry.getValue().getCldrLocale();
+ // The missing value in LSRSource must not be the same as what would come in
+ // that is, if we have aaa => aaa_Bbbb_CC, then we cannot have:
+ // aaa_Dddd => aaa_Dddd_CC, nor
+ // aaa_EE => aaa_Bbbb_EE, nor
+ if (target.getLanguage().equals(targetForLanguage.getLanguage())
+ || target.getScript().equals(targetForLanguage.getScript())) {
+ toRemove.add(source);
+ processErrors.put(
+ Errors.Type.redundant_mapping,
+ source,
+ entry.getValue().getLsrString(),
+ "because: " + language + " ➡ " + targetForLanguage);
+ }
+ }
+ }
+ }
+ for (String badKey : toRemove) {
+ result.remove(badKey);
+ }
+
+ // protect the results
+
+ processErrors.data = CldrUtility.protectCollection(processErrors.data);
+ return CldrUtility.protectCollection(result);
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
@@ -235,7 +317,7 @@ private Multimap readWikidata() {
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
- return result;
+ return ImmutableMultimap.copyOf(result);
}
private void add(
@@ -254,13 +336,18 @@ private void add(
result.put(source, newVersion);
}
- private static class Errors {
+ public static class Errors {
public enum Type {
ill_formed_tags("Ill-formed tags"),
already_CLDR("Language already in CLDR"),
tag_not_in_full("tag ⊄ full"),
exception("exception"),
- skipping_scope("Skipping scope, SIL");
+ skipping_scope("Skipping scope, SIL"),
+ tag_is_full("Tag must not have both script and region"),
+ language_of_tag_missing("Missing tag for just the language"),
+ redundant_mapping(
+ "aaa => aaa_Bbbb_CC makes redundant aaa_Dddd => aaa_Dddd_CC & aaa_EE => aaa_Bbbb_EE"),
+ canonicalizing("either the source or target are not canonical");
private final String printable;
@@ -269,7 +356,11 @@ private Type(String printable) {
}
}
- public Multimap data = TreeMultimap.create();
+ private Multimap data = TreeMultimap.create();
+
+ public Multimap getData() {
+ return data;
+ }
public void put(
Type illFormedTags, String tagValue, String fullValue, String errorMessage) {
@@ -293,63 +384,4 @@ public void printAll() {
}
}
}
-
- static class LSRSource implements Comparable {
- final Row.R4 data;
-
- LSRSource(String lang, String script, String region, String source) {
- if (script.contains("Soyo") || region.contains("Soyo")) {
- int debug = 0;
- }
- data = Row.of(lang, script, region, source);
- data.freeze();
- }
-
- @Override
- public String toString() {
- return combineLSR(data.get0(), data.get1(), data.get2()) + " // " + data.get3();
- }
-
- @Override
- public int compareTo(LSRSource o) {
- return data.compareTo(o.data);
- }
-
- @Override
- public int hashCode() {
- return data.hashCode();
- }
-
- @Override
- public boolean equals(Object obj) {
- return data.equals(obj);
- }
-
- public String line(String source) {
- // TODO Auto-generated method stub
- //
- //
- final String target = combineLSR(data.get0(), data.get1(), data.get2());
- final String origin = data.get3();
- final String result =
- ""
- + "\t";
- return result;
- }
-
- public static String combineLSR(String lang, String script, String region) {
- return lang
- + (script.isEmpty() ? "" : "_" + script)
- + (region.isEmpty() ? "" : "_" + region);
- }
- }
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java
index 37a59e943f0..5bfdf3d0472 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRLocale.java
@@ -672,4 +672,16 @@ public int getRank() {
return 1 + getParent().getRank();
}
}
+
+ // Non-optimized version for now
+ public static CLDRLocale getInstance(String lang, String script, String region) {
+ return getInstance(
+ (lang.isEmpty() || lang.equals("root") ? "und" : lang)
+ + (script.isEmpty() ? "" : "_" + script)
+ + (region.isEmpty() ? "" : "_" + region));
+ }
+
+ public String getRegion() {
+ return getCountry();
+ }
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java
index a56ba239a5b..1c2c171a01c 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LanguageTagCanonicalizer.java
@@ -29,24 +29,35 @@ public class LanguageTagCanonicalizer implements StringTransform {
private final LanguageTagParser ltp1 = new LanguageTagParser();
private final LanguageTagParser ltp2 = new LanguageTagParser();
+ /** Use a parameter to specify LIKELY_FAVOR_SCRIPT or no minimization */
public LanguageTagCanonicalizer() {
this(LstrType.script);
}
+ @Deprecated
public LanguageTagCanonicalizer(boolean favorRegion) {
this(favorRegion ? LstrType.region : LstrType.script);
}
- public LanguageTagCanonicalizer(LstrType lstrType) {
- switch (lstrType) {
- case region:
- likely = LIKELY_FAVOR_REGION;
- break;
- case script:
- likely = LIKELY_FAVOR_SCRIPT;
- break;
- default:
- likely = null;
+ /**
+ * Choose the style of minimization, or null for none.
+ *
+ * @param minimizationTypeOrNull
+ */
+ public LanguageTagCanonicalizer(LstrType minimizationTypeOrNull) {
+ if (minimizationTypeOrNull == null) {
+ likely = null; // don't minimize.
+ } else {
+ switch (minimizationTypeOrNull) {
+ case region:
+ likely = LIKELY_FAVOR_REGION;
+ break;
+ case script:
+ likely = LIKELY_FAVOR_SCRIPT;
+ break;
+ default:
+ likely = null;
+ }
}
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java
index d5c30317235..a22429a3c86 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleScriptInfo.java
@@ -70,6 +70,7 @@ public static String getScriptFromLocale(String locale) {
/**
* Get the script code (aka short property name, like Latn) from the supplemental languageData.
+ * Take the first one if there are 2.
*
* @param locale
* @return null if fails
@@ -82,10 +83,7 @@ public static String getScriptFromSupplementalData(String locale) {
for (BasicLanguageData datum : basicLanguageData.values()) {
final Set scripts = datum.getScripts();
boolean isPrimary = datum.getType() == BasicLanguageData.Type.primary;
- if (scripts.size() != 1) {
- if (scripts.size() > 1 && isPrimary) {
- break;
- }
+ if (scripts.isEmpty()) {
continue;
}
String script = scripts.iterator().next();
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
index 0be181a193c..a1491b31f63 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
@@ -451,7 +451,7 @@ public BasicLanguageData addScript(String script) {
throw new IllegalArgumentException("Illegal Script: " + script);
}
if (scripts == Collections.EMPTY_SET) {
- scripts = new TreeSet<>();
+ scripts = new LinkedHashSet<>(); // retain order
}
scripts.add(script);
return this;