Skip to content

Commit

Permalink
CLDR-17600 Fix test issues
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati committed May 1, 2024
1 parent 24287f5 commit 2422ec6
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 44 deletions.
58 changes: 30 additions & 28 deletions common/supplemental/attributeValueValidity.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<metadata>
<validity>
<!-- BCP 47 contains many more language codes than we are interested in maintaining as a part of the CLDR -->
<!-- This $language list contains ONLY those languages that are in the CLDR locales' IDs (after maximizing with LikelySubtags) -->
<!-- This $language list contains ONLY those languages that TC Locale's IDs (after maximizing with LikelySubtags) -->
<variable id='$language' type='choice'>
af am ar as az
be bg bgc bho bn brx bs
Expand Down Expand Up @@ -35,8 +35,8 @@
yo yue
zh zu
</variable>
<!-- The following are non-TC locales that have reached Basic, plus specials mul root zxx und -->
<variable id='$languageExceptions' type='choice'>
<!-- The following are non-TC locales that have reached at least Basic, plus specials mul root zxx und -->
<variable id='$languageNonTcGeqBasic' type='choice'>
ast
blo br
chr csw
Expand All @@ -62,31 +62,33 @@
za
mul root zxx und
</variable>
<!-- The following are neither CLDR locales nor specials -->
<variable id='$oldLanguages' type='choice'>
aa ace ada ady ain ale alt anp arp ars atj av awa ay
ban bi bin bla bug
cay ch chk chm chp chy clc crg crj crk crl crm crr crs
dak dar dgr dzg
efi eka
fj fon frc
gan gil gor gwi
hai hak hax hil hmn hsn ht hup hur hz
iba ibb ikt ilo inh
kac kbd kfo kha kj kmb kr krc krl kru kum kv kwk
lad lez li lil lld lou loz lsm ltg lua lun lus
mad mag mak men mh min moe mos mwl
na nan nap nb new ng nia niu nog
ojb ojc ojs ojw oka
pag pam pau pqm
quc
rap rar rup
sad sba sco slh sm sma smj sms snk srn str suk swb sy
tce tem tet tgx tht tlh tli ttm tum tvl ty
udm umb
war wuu
xal
ybb
<!-- The following have not yet made it to Basic -->
<variable id='$languageNonTcLtBasic' type='choice'>
aa ab agq ak an ann apc arn asa
ba bal bas bem bew bez bgn blt bm bo bss byn
cad cch ccp ce cgg cho cic ckb co cu
dav dje dua dv dyo dz
ebu ee ewo
ff frr fur
gaa gez gn gsw guz gv
haw hnj
ii io iu
jbo jgo jmc
kab kaj kam kcg kde ken khq ki kkj kl kln kpe ksb ksf ksh kw
la lag lg lkt lld ln lrc ltg lu luo luy
mas mdf mer mfe mg mgh mgo mic moh mua mus myv mzn
naq nb nd nmg nnh nr nso nus nv ny nyn om
os osa
pap pis
quc
rhg rif rn rof rw rwk
saq sbp scn sdh se seh ses sg shi shn sid skr sma smj smn sms sn ss ssy st
teo tig tn tok tpi trv trw ts twq tyv tzm
vai ve vo vun
wa wae wal wbp
xog
yav yi
zgh
</variable>
<variable id='$scriptNonUnicode' type='choice'>Afak Aran Blis Cirt Cyrs Egyd Egyh Geok Inds Jurc Kitl Kpel Latf Latg Loma Maya Moon
Nkgb Phlv Roro Sara Syre Syrj Syrn Teng Visp Wole
Expand Down
3 changes: 1 addition & 2 deletions common/supplemental/coverageLevels.xml
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,7 @@ For terms of use, see http://www.unicode.org/copyright.html
<coverageVariable key="%language60_TD" value="(shu|dzg|kbl|mde|mua|sba)"/>
<!-- See CLDR-16673: All basic+ locales (per coverageLevels.txt) MUST have their language's name at least at level 80 (modern), except for those on exception list -->
<!-- Can use MinimizeRegex.java to "unpack" the compressed lists that are painful to edit -->
<coverageVariable key="%language80" value="(ace|ada|ady|ain|ale|alt|anp|arn|arp|ars|ast|atj|awa|ban|bho|bin|bla|brx|bug|byn|cay|ceb|chk|chm|cho|chp|chr|chy|clc|crg|crj|crk|crl|crm|crr|csw|dak|dar|dgr|doi|dsb|dzg|efi|eka|fil|fon|frc|gaa|gez|gil|gor|gwi|hai|hax|hil|hmn|hsb|hup|hur|iba|ibb|ikt|ilo|inh|jbo|kac|kaj|kbd|kcg|kea|kfo|kgp|kha|kmb|kok|kpe|krc|krl|kru|kum|kwk|lad|lez|lil|lou|loz|lsm|lua|lun|lus|mad|mag|mai|mak|mdf|men|mic|min|mni|moe|moh|mos|mul|mus|mwl|myv|nap|new|nia|niu|nog|nqo|nso|ojb|ojc|ojs|ojw|oka|pag|pam|pap|pau|pcm|pqm|rap|rar|rhg|rup|sad|sah|sat|sba|scn|sco|shn|slh|snk|srn|str|suk|swb|syr|tce|tem|tet|tgx|tht|tig|tlh|tli|tpi|trv|ttm|tum|tvl|tyv|udm|umb|wal|war|wuu|xal|ybb|yrl|yue|zun|zxx|zza|ab|af|am|an|ar|as|av|ay|az|ba|be|bg|bi|bn|br|bs|ca|ch|co|cs|cv|cy|da|de|dv|el|en|es|et|eu|fa|fi|fj|fo|fr|fy|ga|gd|gl|gn|gu|ha|he|hi|hr|ht|hu|hy|hz|ia|id|ig|io|is|it|iu|ja|jv|ka|kj|kk|km|kn|ko|kr|ks|ku|kv|ky|la|lb|li|lo|lt|lv|mh|mi|mk|ml|mn|mr|ms|mt|my|na|ne|ng|nl|nn|no|nr|nv|ny|oc|or|pa|pl|ps|pt|qu|rm|ro|ru|sa|sc|sd|si|sk|sl|sm|so|sq|sr|ss|st|su|sv|sw|ta|te|tg|th|ti|tk|tn|to|tr|ts|tt|ty|ug|uk|ur|uz|ve|vi|wa|wo|xh|yo|zh|zu)"/>
<coverageVariable key="%languagecomp" value="(gan|hak|hsn|nan)"/> <!-- not currently used, just for reference: the only valid language codes that are not in modern coverage -->
<coverageVariable key="%language80" value="(af|am|ar|as|az|be|bg|bgc|bho|bn|brx|bs|ca|ceb|cs|cv|cy|da|de|doi|el|en|es|et|eu|fa|fi|fil|fr|ga|gd|gl|gu|ha|he|hi|hr|hu|hy|id|ig|is|it|ja|jv|ka|kk|km|kn|ko|kok|ks|ky|lo|lt|lv|mai|mi|mk|ml|mn|mni|mr|ms|my|ne|nl|nn|no|or|pa|pcm|pl|ps|pt|raj|ro|ru|sa|sat|sd|si|sk|sl|so|sq|sr|su|sv|sw|ta|te|tg|th|ti|tk|tr|tt|uk|ur|uz|vi|wo|xh|yo|yue|zh|zu|ast|blo|br|chr|csw|dsb|eo|ff|fo|fy|hsb|ia|ie|kea|kgp|ku|kxv|lb|lij|lmo|mt|nds|nqo|oc|prg|qu|rm|sah|sc|syr|szl|to|ug|vec|vmw|xnr|yrl|za|mul|root|zxx|und)"/>
<coverageVariable key="%lbTypes80" value="(strict|normal|loose)"/>
<coverageVariable key="%lwTypes" value="(normal|breakall|keepall|phrase)"/>
<coverageVariable key="%m0Types80" value="(bgn|prprname|ungegn)"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1348,6 +1348,8 @@ private void makeStuffSafe() {
}
}
CLDRScriptCodes = newScripts.build();
CLDRLanguageCodes = CldrUtility.protectCollection(CLDRLanguageCodes);
languageNonTcLtBasic = CldrUtility.protectCollection(languageNonTcLtBasic);
}

/**
Expand Down Expand Up @@ -2144,13 +2146,21 @@ private boolean handleMetadata(String level2, String value, XPathValue parts) {
String level3 = parts.getElement(3);
if (level3.equals("variable")) {
Map<String, String> attributes = parts.getAttributes(-1);
validityInfo.put(attributes.get("id"), Row.of(attributes.get("type"), value));
String idString = attributes.get("id");
if (("$language".equals(idString)
|| "$languageExceptions".equals(attributes.get("id")))
&& "choice".equals(attributes.get("type"))) {
String[] validCodeArray = value.trim().split("\\s+");
CLDRLanguageCodes.addAll(Arrays.asList(validCodeArray));
final String idString = attributes.get("id");
final String typeString = attributes.get("type");
validityInfo.put(idString, Row.of(typeString, value));
if ("choice".equals(typeString)) {
if ("$language".equals(idString)
|| "$languageNonTcGeqBasic".equals(idString)) {
String[] validCodeArray = value.trim().split("\\s+");
CLDRLanguageCodes.addAll(Arrays.asList(validCodeArray));
}
if ("$languageNonTcLtBasic".equals(idString)) { // not yet basic
String[] validCodeArray = value.trim().split("\\s+");
final List<String> asList = Arrays.asList(validCodeArray);
languageNonTcLtBasic.addAll(asList);
CLDRLanguageCodes.addAll(asList);
}
}
return true;
} else if (level3.equals("attributeValues")) {
Expand Down Expand Up @@ -2495,6 +2505,8 @@ public int parseIntegerOrNull(String attributeValue) {
public Map<CLDRLocale, CLDRLocale> baseToDefaultContent; // wo -> wo_Arab_SN
public Map<CLDRLocale, CLDRLocale> defaultContentToBase; // wo_Arab_SN -> wo
private Set<String> CLDRLanguageCodes = new TreeSet<>();
private Set<String> languageNonTcLtBasic = new TreeSet<>();

private Set<String> CLDRScriptCodes;

/**
Expand Down Expand Up @@ -4793,10 +4805,16 @@ public Map<String, R2<String, String>> getValidityInfo() {
return validityInfo;
}

/** TC languages or those at Basic or better. */
public Set<String> getCLDRLanguageCodes() {
return CLDRLanguageCodes;
}

/** Non TC languages that are worse than Basic */
public Set<String> getLanguageNonTcLtBasic() {
return languageNonTcLtBasic;
}

public boolean isCLDRLanguageCode(String code) {
return CLDRLanguageCodes.contains(code);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1173,6 +1173,7 @@ public void testLSR() {

// get CLDR locale IDs' codes

// the maps are from codes (like en) to the best level in the CLDR Organization.
Map<String, Level> langs = new TreeMap<>();
Map<String, Level> scripts = new TreeMap<>();
Map<String, Level> regions = new TreeMap<>();
Expand All @@ -1197,6 +1198,15 @@ public void testLSR() {

Map<String, CoverageStatus> data = new TreeMap<>();

// This is a map from integers (representing language, script or region; should rewrite to
// use enums)
// to a row of data:
// name,
// map code => best cldr org level,
// codes in root
// expected coverage levels levels
// should change the row of data into a class; would be much easier to understand

ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo =
ImmutableMap.of(
CLDRFile.LANGUAGE_NAME,
Expand All @@ -1210,15 +1220,24 @@ public void testLSR() {
typeToInfo.entrySet()) {
int type = typeAndInfo.getKey();
String name = typeAndInfo.getValue().get0();
Map<String, Level> idPartMap = typeAndInfo.getValue().get1();
Set<String> setRoot = typeAndInfo.getValue().get2();
Level targetLevel = typeAndInfo.getValue().get3();
Map<String, Level> idPartMap =
typeAndInfo.getValue().get1(); // map from code to best cldr level
Set<String> setRoot = typeAndInfo.getValue().get2(); // set of codes in root
Level targetLevel =
typeAndInfo.getValue().get3(); // it looks like the targetLevel is ignored

for (String code : Sets.union(idPartMap.keySet(), setRoot)) {
String displayName = testInfo.getEnglish().getName(type, code);
String path = CLDRFile.getKey(type, code);
Level level = coverageLevel.getLevel(path);
data.put(
name + "\t" + code,

// Level level;
// boolean inRoot;
// boolean inId;
// Level languageLevel; best in cldr org
// String displayName;
new CoverageStatus(
level,
setRoot.contains(code),
Expand Down Expand Up @@ -1263,6 +1282,7 @@ public void testLSR() {
}
}

// just check languages
Set<String> ids = new TreeSet<>();
Set<String> missing = new TreeSet<>();
for (Entry<String, CoverageStatus> entry : data.entrySet()) {
Expand All @@ -1272,15 +1292,15 @@ public void testLSR() {
}
final CoverageStatus value = entry.getValue();
if (value.inId) {
String[] parts = key.split("\t");
String[] parts = key.split("\t"); // split into language and code
ids.add(parts[1]);
if (!value.inRoot) {
missing.add(parts[1]);
}
}
}
if (!assertEquals(
"Language subtags that are in a CLDR locale's ID are in root ("
"Language subtags in a locale's ID must be in one of the attributeValueValidity.xml $language* sets, typically $languageNonTcLtBasic ("
+ missing.size()
+ ")",
"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ private void showCldrFile(final CLDRFile cldrFile) {
Exception[] internalException = new Exception[1];
for (String path : cldrFile) {
String value = cldrFile.getStringValue(path);
if (value.equals("[\\- , . % ‰ + 0-9]")) {
int debug = 0;
if (value == null) {
continue; // values may be null, from extraPaths
}
String display = daip.processForDisplay(path, value);
internalException[0] = null;
Expand Down

0 comments on commit 2422ec6

Please sign in to comment.