diff --git a/common/dtd/ldml.dtd b/common/dtd/ldml.dtd
index 984e03305a5..e02fff900ca 100644
--- a/common/dtd/ldml.dtd
+++ b/common/dtd/ldml.dtd
@@ -61,7 +61,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -95,7 +95,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd
index 0ce28c9755e..237fb957063 100644
--- a/common/dtd/ldmlSupplemental.dtd
+++ b/common/dtd/ldmlSupplemental.dtd
@@ -65,7 +65,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -113,7 +113,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -284,7 +284,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -297,7 +297,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -702,7 +702,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -711,7 +711,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -720,9 +720,9 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
-
+
@@ -738,7 +738,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -914,7 +914,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
@@ -962,9 +962,9 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
-
+
@@ -996,7 +996,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
-
+
diff --git a/common/main/en.xml b/common/main/en.xml
index 70918a060d0..29542a24576 100644
--- a/common/main/en.xml
+++ b/common/main/en.xml
@@ -139,6 +139,7 @@ annotations.
Coptic
Capiznon
Cree
+ Woods Cree
Michif
Crimean Tatar
Southern East Cree
@@ -152,7 +153,6 @@ annotations.
Swampy Cree
Church Slavic
Chuvash
- Woods Cree
Welsh
Danish
Dakota
@@ -256,7 +256,6 @@ annotations.
Hakka Chinese
Hawaiian
Southern Haida
- Northern Haida
Hebrew
Hindi
Hindi (Latin)
@@ -284,7 +283,6 @@ annotations.
Igbo
Sichuan Yi
Inupiaq
- Eastern Canadian Inuktitut
Western Canadian Inuktitut
Iloko
Ingush
@@ -474,7 +472,6 @@ annotations.
Ojibwa
Northwestern Ojibwa
Central Ojibwa
- Eastern Ojibwa
Oji-Cree
Western Ojibwa
Okanagan
diff --git a/common/main/fi.xml b/common/main/fi.xml
index c56be83d76c..7bbe4e1dec6 100644
--- a/common/main/fi.xml
+++ b/common/main/fi.xml
@@ -31,7 +31,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/
afrihili
aghem
ainu
- urduni
+ urduni
akan
akkadi
alabama
diff --git a/common/main/la.xml b/common/main/la.xml
index ced058291d1..40a5e6ff4f8 100644
--- a/common/main/la.xml
+++ b/common/main/la.xml
@@ -24,7 +24,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic
Atropatenica
Ruthenica Alba
Bulgarica
- Bihari
Bengalica
Tibetana
Britonica
@@ -66,12 +65,12 @@ CLDR data files are interpreted according to the LDML specification (http://unic
Interlingua
Interlingue
Igbonica
- Indonesia
+ Indonesia
Islandica
Italiana
- Hebraica
+ Hebraica
Iaponica
- Iudaeogermanica
+ Iudaeogermanica
Iavensis
Georgiana
Cazachica
@@ -213,7 +212,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic
Brasilia
Insulae Bahamenses
Butania
- Birmania
Insula Bouvet
Botswana
Ruthenia Alba
@@ -237,7 +235,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic
Insula Christi Natalis
Cyprus
Cechia
- Res publica Democratica Germanica
Germania
Gibutum
Dania
@@ -421,7 +418,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic
Kosovia
Iemenia
Maiotta
- Iugoslavia
Africa Australis
Zambia
Zimbabua
diff --git a/common/main/nl.xml b/common/main/nl.xml
index b73d6893e20..a8e9fa1d1ba 100644
--- a/common/main/nl.xml
+++ b/common/main/nl.xml
@@ -31,7 +31,6 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/
Afrihili
Aghem
Aino
- Zuid-Levantijns-Arabisch
Akan
Akkadisch
Alabama
diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml
index b963f1466e6..58893bc9318 100644
--- a/common/supplemental/supplementalData.xml
+++ b/common/supplemental/supplementalData.xml
@@ -4930,7 +4930,7 @@ XXX Code for transations where no currency is involved
-
+
diff --git a/common/supplemental/supplementalMetadata.xml b/common/supplemental/supplementalMetadata.xml
index 7b3f94549a4..d8e5052adef 100644
--- a/common/supplemental/supplementalMetadata.xml
+++ b/common/supplemental/supplementalMetadata.xml
@@ -179,7 +179,7 @@ For terms of use, see http://www.unicode.org/copyright.html
-
+
@@ -306,6 +306,9 @@ For terms of use, see http://www.unicode.org/copyright.html
+
+
+
diff --git a/common/validity/language.xml b/common/validity/language.xml
index bfe60a517f1..912dac91d86 100644
--- a/common/validity/language.xml
+++ b/common/validity/language.xml
@@ -76,7 +76,7 @@
cia~e cih cik cim~n cip cir ciw ciy
cja cje cjh~i cjk cjm~p cjs cjv cjy
ckb ckh ckl~o ckq~v ckx~z
- cla clc cle clh~m clo cls~u clw cly
+ cla clc cle clh~m clo clt~u clw cly
cma cmc cme cmg cmi cml~m cmo cmr~t
cna~c cng~i cnk~l cno~q cns~u cnw~x
co coa~h coj~q cot~x coz
@@ -628,7 +628,7 @@
aam adp agp ais ajp ajt~u als aoh arb asd aue ayr ayx~y azj
baz bbz bcc bcl bgm bh bhk bic bij bjd bjq bkb blg bmy bpb btb btl bxk bxr bxx byy
- cbe cbh cca ccq cdg cjr cka cld cmk cmn cnr coy cqu cug cum cwd
+ cbe cbh cca ccq cdg cjr cka cld cls cmk cmn cnr coy cqu cug cum cwd
daf dap dgo dgu dha dhd dik diq dit djl dkl drh drr drw dud duj dwl
ekc ekk elp emk emo esk
fat fuc
diff --git a/exemplars/main/rna.xml b/exemplars/main/rna.xml
deleted file mode 100644
index de04a86b277..00000000000
--- a/exemplars/main/rna.xml
+++ /dev/null
@@ -1,23 +0,0 @@
-
-
-
-
-
-
-
-
-
-
- left-to-right
- top-to-bottom
-
-
-
- [a b c d e f g h i j k m n o p r s t u v w y z]
- [l q x]
-
-
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/MatchValue.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/MatchValue.java
index 973186181c6..c0c9f17357e 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/MatchValue.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/MatchValue.java
@@ -4,7 +4,10 @@
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
@@ -17,6 +20,7 @@
import com.vdurmont.semver4j.Semver.SemverType;
import com.vdurmont.semver4j.SemverException;
import java.text.ParseException;
+import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
@@ -104,8 +108,9 @@ public static MatchValue of(String command) {
throw new IllegalArgumentException(
"Illegal/Unimplemented match type: " + originalArg);
}
+ // check for errors in the MatchValue functions
if (!originalArg.equals(result.getName())) {
- System.err.println(
+ throw new IllegalArgumentException(
"Non-standard form or error: " + originalArg + " ==> " + result.getName());
}
return result;
@@ -157,14 +162,22 @@ public static class LocaleMatchValue extends MatchValue {
private final Predicate variant;
public LocaleMatchValue() {
- this(null);
+ this(null, null, null, null); // use default status
}
public LocaleMatchValue(Set statuses) {
- lang = new ValidityMatchValue(LstrType.language, statuses, false);
- script = new ValidityMatchValue(LstrType.script, statuses, false);
- region = new ValidityMatchValue(LstrType.region, statuses, false);
- variant = new ValidityMatchValue(LstrType.variant, statuses, false);
+ this(statuses, statuses, statuses, statuses);
+ }
+
+ public LocaleMatchValue(
+ Set langStatus,
+ Set scriptStatus,
+ Set regionStatus,
+ Set variantStatus) {
+ lang = new ValidityMatchValue(LstrType.language, langStatus, false);
+ script = new ValidityMatchValue(LstrType.script, scriptStatus, false);
+ region = new ValidityMatchValue(LstrType.region, regionStatus, false);
+ variant = new ValidityMatchValue(LstrType.variant, variantStatus, false);
}
@Override
@@ -174,8 +187,11 @@ public String getName() {
@Override
public boolean is(String item) {
+ if (item.equals("root")) {
+ item = "und";
+ }
if (!item.contains("_")) {
- return lang.is(item);
+ return checkLang(item);
}
LanguageTagParser ltp;
try {
@@ -183,7 +199,7 @@ public boolean is(String item) {
} catch (Exception e) {
return false;
}
- return lang.is(ltp.getLanguage())
+ return checkLang(ltp.getLanguage())
&& (ltp.getScript().isEmpty() || script.is(ltp.getScript()))
&& (ltp.getRegion().isEmpty() || region.is(ltp.getRegion()))
&& (ltp.getVariants().isEmpty() || and(variant, ltp.getVariants()))
@@ -191,12 +207,54 @@ public boolean is(String item) {
&& ltp.getLocaleExtensions().isEmpty();
}
+ public boolean checkLang(String language) {
+ return lang.is(language);
+ }
+
@Override
public String getSample() {
return "de";
}
}
+ /**
+ * Check for the language OR certain backwards-compatible exceptions for data to support
+ * retaining variants, namely likelySubtags: "in","iw","ji","jw","mo","tl"
+ */
+ public static class XLocaleMatchValue extends LocaleMatchValue {
+ static final Set exceptions = Set.of("in", "iw", "ji", "jw", "mo", "tl");
+
+ @Override
+ public boolean checkLang(String language) {
+ return super.checkLang(language) // first check normal
+ || exceptions.contains(language);
+ }
+
+ @Override
+ public String getName() {
+ return "validity/locale-for-likely";
+ }
+ }
+
+ /**
+ * Check for the language OR certain backwards-compatible exceptions for language names: "fat",
+ * "sh", "tl", "tw"
+ */
+ public static class NLocaleMatchValue extends LocaleMatchValue {
+ static final Set exceptions = Set.of("fat", "sh", "tl", "tw");
+
+ @Override
+ public boolean checkLang(String language) {
+ return super.checkLang(language) // first check normal
+ || exceptions.contains(language);
+ }
+
+ @Override
+ public String getName() {
+ return "validity/locale-for-names";
+ }
+ }
+
// TODO remove these if possible — ticket/10120
static final Set SCRIPT_HACK =
ImmutableSet.of(
@@ -253,6 +311,9 @@ public static EnumParser of(Class aClass) {
}
public Set parse(String text) {
+ if (text == null) {
+ return null;
+ }
Set statuses = EnumSet.noneOf(aClass);
boolean negative = text.startsWith("!");
if (negative) {
@@ -293,18 +354,59 @@ public boolean isAll(Set statuses) {
}
public static class ValidityMatchValue extends MatchValue {
+ private static final Validity VALIDITY = Validity.getInstance();
+ public static final Multimap DEFAULT_STATUS;
+
+ static {
+ Multimap DEFAULT_STATUS_ = TreeMultimap.create();
+ for (LstrType lstrType : LstrType.values()) {
+ switch (lstrType) {
+ case region:
+ DEFAULT_STATUS_.putAll(
+ lstrType,
+ Set.of(
+ Status.regular,
+ Status.unknown,
+ Status.macroregion,
+ Status.special));
+ break;
+ case language:
+ case script:
+ DEFAULT_STATUS_.putAll(
+ lstrType, Set.of(Status.regular, Status.unknown, Status.special));
+ break;
+ case subdivision:
+ case currency:
+ DEFAULT_STATUS_.putAll(
+ lstrType,
+ Set.of(Status.regular, Status.unknown, Status.deprecated));
+ break;
+ default:
+ DEFAULT_STATUS_.putAll(lstrType, Set.of(Status.regular, Status.unknown));
+ break;
+ }
+ }
+ DEFAULT_STATUS = ImmutableMultimap.copyOf(DEFAULT_STATUS_);
+ }
+
+ private static Map shortCodeToStatus;
+ private static final EnumParser validityStatusParser = EnumParser.of(Status.class);
+
private final LstrType type;
private final boolean shortId;
private final Set statuses;
- private static Map shortCodeToStatus;
- private static final EnumParser enumParser = EnumParser.of(Status.class);
@Override
public String getName() {
+ Collections a;
return "validity/"
+ (shortId ? "short-" : "")
+ type.toString()
- + (enumParser.isAll(statuses) ? "" : "/" + enumParser.format(statuses));
+ + (statuses.equals(Set.copyOf(DEFAULT_STATUS.get(type)))
+ ? ""
+ : statuses.equals(VALIDITY.getStatusToCodes(type).keySet())
+ ? "/all"
+ : "/" + validityStatusParser.format(statuses));
}
private ValidityMatchValue(LstrType type) {
@@ -317,21 +419,28 @@ private ValidityMatchValue(LstrType type, Set statuses, boolean shortId)
throw new IllegalArgumentException("short- not supported except for units");
}
this.shortId = shortId;
+ // validForType = Validity.getInstance().getStatusToCodes(type).keySet();
this.statuses =
- statuses == null ? EnumSet.allOf(Status.class) : ImmutableSet.copyOf(statuses);
+ ImmutableSet.copyOf(statuses == null ? DEFAULT_STATUS.get(type) : statuses);
}
public static MatchValue of(String typeName) {
if (typeName.equals("locale")) {
return new LocaleMatchValue();
}
+ if (typeName.equals("locale-for-likely")) {
+ return new XLocaleMatchValue();
+ }
+ if (typeName.equals("locale-for-names")) {
+ return new NLocaleMatchValue();
+ }
if (typeName.equals("bcp47-wellformed")) {
return new BCP47LocaleWellFormedMatchValue();
}
+ String statusPart = null;
int slashPos = typeName.indexOf('/');
- Set statuses = null;
if (slashPos > 0) {
- statuses = enumParser.parse(typeName.substring(slashPos + 1));
+ statusPart = typeName.substring(slashPos + 1);
typeName = typeName.substring(0, slashPos);
}
boolean shortId = typeName.startsWith("short-");
@@ -339,6 +448,10 @@ public static MatchValue of(String typeName) {
typeName = typeName.substring(6);
}
LstrType type = LstrType.fromString(typeName);
+ Set statuses =
+ "all".equals(statusPart)
+ ? VALIDITY.getStatusToCodes(type).keySet()
+ : validityStatusParser.parse(statusPart);
return new ValidityMatchValue(type, statuses, shortId);
}
@@ -366,9 +479,7 @@ public boolean is(String item) {
== null) { // lazy evaluation to avoid circular dependencies
Map _shortCodeToStatus = new TreeMap<>();
for (Entry entry :
- Validity.getInstance()
- .getCodeToStatus(LstrType.unit)
- .entrySet()) {
+ VALIDITY.getCodeToStatus(LstrType.unit).entrySet()) {
String key = entry.getKey();
Status status = entry.getValue();
final String shortKey = key.substring(key.indexOf('-') + 1);
@@ -389,13 +500,13 @@ public boolean is(String item) {
default:
break;
}
- final Status status = Validity.getInstance().getCodeToStatus(type).get(item);
+ final Status status = VALIDITY.getCodeToStatus(type).get(item);
return status != null && statuses.contains(status);
}
@Override
public String getSample() {
- return Validity.getInstance().getCodeToStatus(type).keySet().iterator().next();
+ return VALIDITY.getCodeToStatus(type).keySet().iterator().next();
}
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
index 2caae47d1b2..a298de5c959 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
@@ -1312,7 +1312,7 @@ private void makeStuffSafe() {
if (unitAliases != null) { // don't load unless the information is there (for old releases);
unitConverter.addAliases(unitAliases);
}
- unitConverter.freeze();
+ unitConverter.freeze(new File(directory, "../validity").toString());
rationalParser.freeze();
unitPreferences.freeze();
@@ -2166,8 +2166,13 @@ private boolean handleMetadata(String level2, String value, XPathValue parts) {
}
return true;
} else if (level3.equals("attributeValues")) {
- AttributeValidityInfo.add(
- parts.getAttributes(-1), value, attributeValidityInfo);
+ // the keyboard directory disappeared in new versions.
+ // supplementalData/metadata/validity/attributeValues[@dtds="keyboard"][@elements="keyMap"][@attributes="modifiers"][@type="TODO"]
+ final String dtdsValue = parts.getAttributeValue(-1, "dtds");
+ if (!"keyboard".equals(dtdsValue) && !"platform".equals(dtdsValue)) {
+ AttributeValidityInfo.add(
+ parts.getAttributes(-1), value, attributeValidityInfo);
+ }
return true;
}
} else if (level2.equals("serialElements")) {
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
index 789350e075d..482dff86b1c 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
@@ -168,6 +168,10 @@ public boolean isFrozen() {
@Override
public UnitConverter freeze() {
+ return freeze(CLDRPaths.VALIDITY_DIRECTORY);
+ }
+
+ public UnitConverter freeze(String validityDirectory) {
if (!frozen) {
frozen = true;
rationalParser.freeze();
@@ -185,7 +189,7 @@ public UnitConverter freeze() {
baseUnits = builder.build();
targetInfoComparator = new TargetInfoComparator();
- buildMapComparators();
+ buildMapComparators(validityDirectory);
// must be after building comparators
idToUnitId = ImmutableMap.copyOf(buildIdToUnitId());
@@ -194,14 +198,19 @@ public UnitConverter freeze() {
}
public void buildMapComparators() {
+ buildMapComparators(CLDRPaths.VALIDITY_DIRECTORY);
+ }
+
+ public void buildMapComparators(String validityDirectory) {
Set> all = new TreeSet<>();
+ final Validity validity = Validity.getInstance(validityDirectory);
Set baseSeen = new HashSet<>();
+
if (DEBUG) {
UnitParser up = new UnitParser(componentTypeData);
Output uict = new Output<>();
- for (String longUnit :
- Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
+ for (String longUnit : validity.getStatusToCodes(LstrType.unit).get(Status.regular)) {
String shortUnit = getShortId(longUnit);
up.set(shortUnit);
List items = new ArrayList<>();
@@ -219,8 +228,7 @@ public void buildMapComparators() {
System.out.println(shortUnit + "\t" + Joiner.on('\t').join(items));
}
}
- for (String longUnit :
- Validity.getInstance().getStatusToCodes(LstrType.unit).get(Status.regular)) {
+ for (String longUnit : validity.getStatusToCodes(LstrType.unit).get(Status.regular)) {
Output base = new Output<>();
String shortUnit = getShortId(longUnit);
ConversionInfo conversionInfo = parseUnitId(shortUnit, base, false);
@@ -232,18 +240,23 @@ public void buildMapComparators() {
conversionInfo = parseUnitId("kelvin", base, false);
}
}
- String quantity;
+ String quantity = null;
Integer quantityNumericOrder = null;
try {
quantity = getQuantityFromUnit(base.value, false);
+ if (quantity == null && "beaufort".equals(shortUnit)) {
+ quantity = "speed";
+ }
quantityNumericOrder = quantityComparator.getNumericOrder(quantity);
} catch (Exception e) {
System.out.println(
- "Failed "
+ "Failed to build unit comparator for "
+ shortUnit
+ ", "
+ base
+ ", "
+ + quantity
+ + ", "
+ quantityNumericOrder
+ ", "
+ e);
@@ -284,7 +297,11 @@ public void buildMapComparators() {
"Add new unitSystem to a grouping: " + sortingSystem);
}
R4 sortKey =
- Row.of(quantityNumericOrder, sortingSystem, conversionInfo.factor, shortUnit);
+ Row.of(
+ quantityNumericOrder,
+ sortingSystem,
+ conversionInfo == null ? Rational.INFINITY : conversionInfo.factor,
+ shortUnit);
all.add(sortKey);
}
LongUnitIdOrder.setErrorOnMissing(true);
@@ -1882,16 +1899,16 @@ public BiMap getBaseUnitToQuantity() {
return (BiMap) baseUnitToQuantity;
}
+ /** Returns null if unit can't be parsed */
public String getQuantityFromUnit(String unit, boolean showYourWork) {
Output metricUnit = new Output<>();
unit = fixDenormalized(unit);
try {
ConversionInfo unitInfo = parseUnitId(unit, metricUnit, showYourWork);
- return metricUnit.value == null ? null : getQuantityFromBaseUnit(metricUnit.value);
} catch (Exception e) {
- System.out.println("Failed with " + unit + ", " + metricUnit + "\t" + e);
return null;
}
+ return metricUnit.value == null ? null : getQuantityFromBaseUnit(metricUnit.value);
}
public String getQuantityFromBaseUnit(String baseUnit) {
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/Validity.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/Validity.java
index 358fc099df8..ca56301fc01 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/Validity.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/Validity.java
@@ -2,7 +2,9 @@
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableSet;
+import com.ibm.icu.util.ICUUncheckedIOException;
import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.LinkedHashMap;
@@ -38,6 +40,11 @@ public static Validity getInstance() {
}
public static Validity getInstance(String validityDirectory) {
+ try {
+ validityDirectory = new File(validityDirectory).getCanonicalFile().toString();
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
Validity result = cache.get(validityDirectory);
if (result == null) {
final Validity value = new Validity(validityDirectory);
@@ -79,7 +86,7 @@ private Validity(String validityDirectory) {
codeToStatus.put(type, subCodeToStatus = new LinkedHashMap<>());
}
- XMLFileReader.loadPathValues(basePath + file, lineData, true);
+ XMLFileReader.loadPathValues(new File(basePath, file).toString(), lineData, true);
for (Pair item : lineData) {
XPathValue parts = SimpleXPathParts.getFrozenInstance(item.getFirst());
if (!"id".equals(parts.getElement(-1))) {
diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAttributeValues.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAttributeValues.java
index a288cafb6b5..e2bf827b68c 100644
--- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAttributeValues.java
+++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestAttributeValues.java
@@ -92,7 +92,7 @@ public void TestValid() {
addXMLFiles(dtdType, mainDirs + stringDir, files);
if (isVerbose())
synchronized (pathChecker.testLog) {
- warnln(mainDirs + stringDir);
+ logln(mainDirs + stringDir);
}
}
Stream stream = SERIAL ? files.stream() : files.parallelStream();
@@ -102,7 +102,7 @@ public void TestValid() {
// checkFile(pathChecker, file);
// }
}
- pathChecker.show(isVerbose(), showStatuses);
+ pathChecker.show(dtdType, isVerbose(), showStatuses);
}
// List localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja",
// "ru", "cy"
@@ -145,9 +145,9 @@ private void addXMLFiles(DtdType dtdType, String path, Set files) {
} else {
for (String file : dirFile.list()) {
String localeID = file.replace(".xml", "");
- if (StandardCodes.isLocaleAtLeastBasic(localeID)) {
- addXMLFiles(dtdType, path + "/" + file, files);
- }
+ // if (StandardCodes.isLocaleAtLeastBasic(localeID)) {
+ addXMLFiles(dtdType, path + "/" + file, files);
+ // }
}
}
}
@@ -186,7 +186,8 @@ private void checkFile(PathChecker pathChecker, String fullFile) {
++_attributeCount;
String attribute = r.getAttributeLocalName(i);
String attributeValue = r.getAttributeValue(i);
- pathChecker.checkAttribute(element, attribute, attributeValue);
+ pathChecker.checkAttribute(
+ fullFile, element, attribute, attributeValue);
}
break;
}
@@ -237,7 +238,7 @@ public PathChecker(TestFmwk testLog, DtdData dtdData) {
matchValues = ImmutableMap.copyOf(_matchValues);
}
- private void checkPath(String path) {
+ private void checkPath(String fullFile, String path) {
if (seen.contains(path)) {
return;
}
@@ -251,19 +252,20 @@ private void checkPath(String path) {
for (Entry entry : parts.getAttributes(elementIndex).entrySet()) {
String attribute = entry.getKey();
String attrValue = entry.getValue();
- checkAttribute(element, attribute, attrValue);
+ checkAttribute(fullFile, element, attribute, attrValue);
}
}
}
- public void checkElement(String element, Attributes atts) {
+ public void checkElement(String fullFile, String element, Attributes atts) {
int length = atts.getLength();
for (int i = 0; i < length; ++i) {
- checkAttribute(element, atts.getQName(i), atts.getValue(i));
+ checkAttribute(fullFile, element, atts.getQName(i), atts.getValue(i));
}
}
- private void checkAttribute(String element, String attribute, String attrValue) {
+ private void checkAttribute(
+ String fullFile, String element, String attribute, String attrValue) {
// skip cases we know we don't need to test
if (!needsTesting.containsEntry(element, attribute)) {
return;
@@ -296,16 +298,18 @@ private void checkAttribute(String element, String attribute, String attrValue)
// Set breakpoint here for debugging (referenced from
// http://cldr.unicode.org/development/testattributevalues)
dtdData.getValueStatus(element, attribute, attrValue);
+ testLog.warnln(
+ Joiner.on('\t').join("Invalid", fullFile, element, attribute, attrValue));
}
synchronized (valueStatusInfo) {
valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE);
}
}
- void show(boolean verbose, ImmutableSet retain) {
+ void show(DtdType dtdType, boolean verbose, ImmutableSet retain) {
if (dtdData.dtdType == DtdType.keyboard3
&& testLog.logKnownIssue("CLDR-14974", "skipping for keyboard")) {
- testLog.warnln("Skipping for keyboard3");
+ testLog.warnln("keyboard3 is missing validity checks");
}
boolean haveProblems = false;
for (ValueStatus valueStatus : ValueStatus.values()) {
@@ -323,7 +327,9 @@ void show(boolean verbose, ImmutableSet retain) {
}
StringBuilder out = new StringBuilder();
out.append(
- "\nIf the test fails, look at https://cldr.unicode.org/development/cldr-development-site/testattributevalues\n");
+ "For "
+ + dtdType.directories
+ + "\nIf the test fails, use -v for details. Also look at https://cldr.unicode.org/development/updating-codes/testattributevalues for guidance.\n");
out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n");
out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n");
diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java
index c21c6c5689c..9a3f3773249 100644
--- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java
+++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java
@@ -5,6 +5,8 @@
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
+import com.google.common.collect.Sets;
+import com.google.common.collect.Sets.SetView;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
@@ -41,6 +43,7 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.stream.Collectors;
import org.unicode.cldr.test.DisplayAndInputProcessor;
import org.unicode.cldr.tool.CldrVersion;
import org.unicode.cldr.tool.LikelySubtags;
@@ -195,7 +198,7 @@ private void checkDtds(
} else if (fileName.getPath().contains("/keyboards/3.0/")
&& logKnownIssue(
"CLDR-17574", "With v46, parsing issues for keyboard xml files")) {
- ; // do nothing, skip test
+ // do nothing, skip test
} else if (name.endsWith(".xml")) {
data.add(check(fileName));
if (deepCheck // takes too long to do all the time
@@ -1652,5 +1655,57 @@ public void sortPaths(Comparator dc, Collection paths) {
public void sortPaths(Comparator dc, String... array) {
Arrays.sort(array, 0, array.length, dc);
}
+
// public void TestNewDtdData() moved to TestDtdData
+
+ public void testBcp47Ids() {
+ if (!TestCLDRPaths.canUseArchiveDirectory()) {
+ return;
+ }
+ final File ARCHIVE = new File(CLDRPaths.ARCHIVE_DIRECTORY);
+ Set> seen = new LinkedHashSet<>();
+
+ // get the archive directories in reverse order (latest first)
+
+ TreeSet sortedArchiveDirectories = new TreeSet<>(Collections.reverseOrder());
+ sortedArchiveDirectories.addAll(Arrays.asList(ARCHIVE.listFiles()));
+
+ // get the BCP 47 keys to test against
+
+ Set> newKeys = pairs(SUPPLEMENTAL_DATA_INFO.getBcp47Keys());
+
+ for (File file : sortedArchiveDirectories) {
+ if (!file.getName().startsWith("cldr-")) {
+ continue;
+ }
+ if (file.getName().compareTo("cldr-44.0") < 0) {
+ break;
+ }
+ logln(file.toString());
+ File supplementalDir = new File(file, "common/supplemental");
+ SupplementalDataInfo otherSupplementalData;
+ try {
+ otherSupplementalData = SupplementalDataInfo.getInstance(supplementalDir);
+ } catch (RuntimeException e) {
+ errln("Can't create SupplementalDataInfo for " + supplementalDir);
+ throw e;
+ // continue;
+ }
+ Set> oldKeys = pairs(otherSupplementalData.getBcp47Keys());
+ if (!newKeys.containsAll(oldKeys)) {
+ SetView> oldButNotNew = Sets.difference(oldKeys, newKeys);
+ SetView> oldButNotNewMinusSeen =
+ Sets.difference(oldButNotNew, seen);
+ if (!assertEquals(file.toString(), Collections.emptySet(), oldButNotNewMinusSeen)) {
+ seen.addAll(oldButNotNewMinusSeen);
+ }
+ }
+ }
+ }
+
+ private Set> pairs(Relation bcp47Keys) {
+ return bcp47Keys.entrySet().stream()
+ .map(x -> Pair.of(x.getKey(), x.getValue()))
+ .collect(Collectors.toCollection(TreeSet::new));
+ }
}
diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestSupplementalInfo.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestSupplementalInfo.java
index fcf753bbccb..87ab3b0780f 100644
--- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestSupplementalInfo.java
+++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestSupplementalInfo.java
@@ -78,6 +78,7 @@
import org.unicode.cldr.util.PreferredAndAllowedHour.HourStyle;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.StandardCodes.CodeType;
+import org.unicode.cldr.util.StandardCodes.LstrField;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
@@ -2176,4 +2177,48 @@ public void TestGrammarInfo() {
}
}
}
+
+ public void testPredominantEncompassed() {
+ // maybe check with lstreg instead? They should be in sync.
+ Map>> lstreg = StandardCodes.getEnumLstreg();
+
+ SupplementalDataInfo supp = SupplementalDataInfo.getInstance();
+ // Returns type -> tag -> , like "language" -> "sh" -> <{"sr_Latn"}, reason>
+ Map, String>>> locAliases = supp.getLocaleAliasInfo();
+ Map, String>> langAliases = locAliases.get("language");
+ Set skip = Set.of("no", "sh");
+
+ Iso639Data.getNames("a"); // init (need to fix)
+
+ Set macros = Iso639Data.getMacros();
+ main:
+ for (String macro : macros) {
+ if (skip.contains(macro)) {
+ continue;
+ }
+ Set encompasseds = Iso639Data.getEncompassedForMacro(macro);
+ final List encompassedNames =
+ encompasseds.stream().map(x -> codeAndName(x)).collect(Collectors.toList());
+ for (String encompassed : encompasseds) {
+ R2, String> data = langAliases.get(encompassed);
+ if (data != null) {
+ if (data.get0().contains(macro)) {
+ logln(
+ codeAndName(macro)
+ + "has predominant "
+ + codeAndName(encompassed)
+ + " in encompassed: "
+ + encompassedNames);
+ continue main;
+ }
+ }
+ }
+ errln("ERROR " + codeAndName(macro) + " missing predominent from " + encompassedNames);
+ }
+ }
+
+ private String codeAndName(String macro) {
+ // TODO Auto-generated method stub
+ return CLDRConfig.getInstance().getEnglish().getName(macro) + " (" + macro + ")";
+ }
}