Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17535 Ensure testData is good for likelySubtags #3977

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions common/supplemental/likelySubtags.xml
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="zza" to="zza_Latn_TR"/> <!--Zaza‧?‧? ➡ Zaza‧Latin‧Türkiye-->
<!-- Data to find likely language; some implementations may omit -->
<likelySubtag from="und" to="en_Latn_US"/> <!--?‧?‧? ➡ English‧Latin‧United States-->
<likelySubtag from="und_001" to="en_Latn_US"/> <!--?‧?‧world ➡ English‧Latin‧United States-->
<likelySubtag from="und_419" to="es_Latn_419"/> <!--?‧?‧Latin America ➡ Spanish‧Latin‧Latin America-->
<likelySubtag from="und_AD" to="ca_Latn_AD"/> <!--?‧?‧Andorra ➡ Catalan‧Latin‧Andorra-->
<likelySubtag from="und_AE" to="ar_Arab_AE"/> <!--?‧?‧United Arab Emirates ➡ Arabic‧Arabic‧United Arab Emirates-->
Expand Down Expand Up @@ -1162,7 +1161,6 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="und_Kthi" to="bho_Kthi_IN"/> <!--?‧Kaithi‧? ➡ Bhojpuri‧Kaithi‧India-->
<likelySubtag from="und_Lana" to="nod_Lana_TH"/> <!--?‧Lanna‧? ➡ Northern Thai‧Lanna‧Thailand-->
<likelySubtag from="und_Laoo" to="lo_Laoo_LA"/> <!--?‧Lao‧? ➡ Lao‧Lao‧Laos-->
<likelySubtag from="und_Latn_001" to="en_Latn_US"/> <!--?‧Latin‧world ➡ English‧Latin‧United States-->
<likelySubtag from="und_Latn_AE" to="en_Latn_AE"/> <!--?‧Latin‧United Arab Emirates ➡ English‧Latin‧United Arab Emirates-->
<likelySubtag from="und_Latn_AF" to="tk_Latn_AF"/> <!--?‧Latin‧Afghanistan ➡ Turkmen‧Latin‧Afghanistan-->
<likelySubtag from="und_Latn_AM" to="ku_Latn_AM"/> <!--?‧Latin‧Armenia ➡ Kurdish‧Latin‧Armenia-->
Expand Down
75 changes: 68 additions & 7 deletions common/testData/localeIdentifiers/likelySubtags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ af-Egyp ; af-Egyp-ZA ; af-Egyp ;
af-Latn ; af-Latn-ZA ; af ;
af-NA ; af-Latn-NA ; af-NA ;
af-ZA ; af-Latn-ZA ; af ;
ak ; ak-Latn-GH ; ak ;
ak-AQ ; ak-Latn-AQ ; ak-AQ ;
ak-Egyp ; ak-Egyp-GH ; ak-Egyp ;
ak-GH ; ak-Latn-GH ; ak ;
ak-Latn ; ak-Latn-GH ; ak ;
am ; am-Ethi-ET ; am ;
am-AQ ; am-Ethi-AQ ; am-AQ ;
am-ET ; am-Ethi-ET ; am ;
Expand Down Expand Up @@ -77,6 +82,11 @@ az-AQ ; az-Latn-AQ ; az-AQ ;
az-AZ ; az-Latn-AZ ; az ;
az-Egyp ; az-Egyp-AZ ; az-Egyp ;
az-Latn ; az-Latn-AZ ; az ;
bal ; bal-Arab-PK ; bal ;
bal-AQ ; bal-Arab-AQ ; bal-AQ ;
bal-Egyp ; bal-Egyp-PK ; bal-Egyp ;
bal-Latn ; bal-Latn-PK ; bal-Latn ;
bal-PK ; bal-Arab-PK ; bal ;
be ; be-Cyrl-BY ; be ;
be-AQ ; be-Cyrl-AQ ; be-AQ ;
be-BY ; be-Cyrl-BY ; be ;
Expand Down Expand Up @@ -189,6 +199,12 @@ dsb-AQ ; dsb-Latn-AQ ; dsb-AQ ;
dsb-DE ; dsb-Latn-DE ; dsb ;
dsb-Egyp ; dsb-Egyp-DE ; dsb-Egyp ;
dsb-Latn ; dsb-Latn-DE ; dsb ;
ee ; ee-Latn-GH ; ee ;
ee-AQ ; ee-Latn-AQ ; ee-AQ ;
ee-Egyp ; ee-Egyp-GH ; ee-Egyp ;
ee-GH ; ee-Latn-GH ; ee ;
ee-Latn ; ee-Latn-GH ; ee ;
ee-TG ; ee-Latn-TG ; ee-TG ;
el ; el-Grek-GR ; el ;
el-AQ ; el-Grek-AQ ; el-AQ ;
el-CY ; el-Grek-CY ; el-CY ;
Expand Down Expand Up @@ -447,6 +463,11 @@ ga-Egyp ; ga-Egyp-IE ; ga-Egyp ;
ga-GB ; ga-Latn-GB ; ga-GB ;
ga-IE ; ga-Latn-IE ; ga ;
ga-Latn ; ga-Latn-IE ; ga ;
gaa ; gaa-Latn-GH ; gaa ;
gaa-AQ ; gaa-Latn-AQ ; gaa-AQ ;
gaa-Egyp ; gaa-Egyp-GH ; gaa-Egyp ;
gaa-GH ; gaa-Latn-GH ; gaa ;
gaa-Latn ; gaa-Latn-GH ; gaa ;
gd ; gd-Latn-GB ; gd ;
gd-AQ ; gd-Latn-AQ ; gd-AQ ;
gd-Egyp ; gd-Egyp-GB ; gd-Egyp ;
Expand Down Expand Up @@ -516,6 +537,11 @@ ig-AQ ; ig-Latn-AQ ; ig-AQ ;
ig-Egyp ; ig-Egyp-NG ; ig-Egyp ;
ig-Latn ; ig-Latn-NG ; ig ;
ig-NG ; ig-Latn-NG ; ig ;
ii ; ii-Yiii-CN ; ii ;
ii-AQ ; ii-Yiii-AQ ; ii-AQ ;
ii-CN ; ii-Yiii-CN ; ii ;
ii-Egyp ; ii-Egyp-CN ; ii-Egyp ;
ii-Yiii ; ii-Yiii-CN ; ii ;
is ; is-Latn-IS ; is ;
is-AQ ; is-Latn-AQ ; is-AQ ;
is-Egyp ; is-Egyp-IS ; is-Egyp ;
Expand Down Expand Up @@ -581,6 +607,7 @@ kok-AQ ; kok-Deva-AQ ; kok-AQ ;
kok-Deva ; kok-Deva-IN ; kok ;
kok-Egyp ; kok-Egyp-IN ; kok-Egyp ;
kok-IN ; kok-Deva-IN ; kok ;
kok-Latn ; kok-Latn-IN ; kok-Latn ;
ks ; ks-Arab-IN ; ks ;
ks-AQ ; ks-Arab-AQ ; ks-AQ ;
ks-Arab ; ks-Arab-IN ; ks ;
Expand Down Expand Up @@ -732,12 +759,23 @@ nqo-AQ ; nqo-Nkoo-AQ ; nqo-AQ ;
nqo-Egyp ; nqo-Egyp-GN ; nqo-Egyp ;
nqo-GN ; nqo-Nkoo-GN ; nqo ;
nqo-Nkoo ; nqo-Nkoo-GN ; nqo ;
nso ; nso-Latn-ZA ; nso ;
nso-AQ ; nso-Latn-AQ ; nso-AQ ;
nso-Egyp ; nso-Egyp-ZA ; nso-Egyp ;
nso-Latn ; nso-Latn-ZA ; nso ;
nso-ZA ; nso-Latn-ZA ; nso ;
oc ; oc-Latn-FR ; oc ;
oc-AQ ; oc-Latn-AQ ; oc-AQ ;
oc-ES ; oc-Latn-ES ; oc-ES ;
oc-Egyp ; oc-Egyp-FR ; oc-Egyp ;
oc-FR ; oc-Latn-FR ; oc ;
oc-Latn ; oc-Latn-FR ; oc ;
om ; om-Latn-ET ; om ;
om-AQ ; om-Latn-AQ ; om-AQ ;
om-ET ; om-Latn-ET ; om ;
om-Egyp ; om-Egyp-ET ; om-Egyp ;
om-KE ; om-Latn-KE ; om-KE ;
om-Latn ; om-Latn-ET ; om ;
or ; or-Orya-IN ; or ;
or-AQ ; or-Orya-AQ ; or-AQ ;
or-Egyp ; or-Egyp-IN ; or-Egyp ;
Expand Down Expand Up @@ -822,6 +860,11 @@ ru-KZ ; ru-Cyrl-KZ ; ru-KZ ;
ru-MD ; ru-Cyrl-MD ; ru-MD ;
ru-RU ; ru-Cyrl-RU ; ru ;
ru-UA ; ru-Cyrl-UA ; ru-UA ;
rw ; rw-Latn-RW ; rw ;
rw-AQ ; rw-Latn-AQ ; rw-AQ ;
rw-Egyp ; rw-Egyp-RW ; rw-Egyp ;
rw-Latn ; rw-Latn-RW ; rw ;
rw-RW ; rw-Latn-RW ; rw ;
sa ; sa-Deva-IN ; sa ;
sa-AQ ; sa-Deva-AQ ; sa-AQ ;
sa-Deva ; sa-Deva-IN ; sa ;
Expand Down Expand Up @@ -888,6 +931,12 @@ sr-Latn ; sr-Latn-RS ; sr-Latn ;
sr-ME ; sr-Latn-ME ; sr-ME ;
sr-RS ; sr-Cyrl-RS ; sr ;
sr-XK ; sr-Cyrl-XK ; sr-XK ;
st ; st-Latn-ZA ; st ;
st-AQ ; st-Latn-AQ ; st-AQ ;
st-Egyp ; st-Egyp-ZA ; st-Egyp ;
st-LS ; st-Latn-LS ; st-LS ;
st-Latn ; st-Latn-ZA ; st ;
st-ZA ; st-Latn-ZA ; st ;
su ; su-Latn-ID ; su ;
su-AQ ; su-Latn-AQ ; su-AQ ;
su-Egyp ; su-Egyp-ID ; su-Egyp ;
Expand Down Expand Up @@ -953,6 +1002,12 @@ tk-AQ ; tk-Latn-AQ ; tk-AQ ;
tk-Egyp ; tk-Egyp-TM ; tk-Egyp ;
tk-Latn ; tk-Latn-TM ; tk ;
tk-TM ; tk-Latn-TM ; tk ;
tn ; tn-Latn-ZA ; tn ;
tn-AQ ; tn-Latn-AQ ; tn-AQ ;
tn-BW ; tn-Latn-BW ; tn-BW ;
tn-Egyp ; tn-Egyp-ZA ; tn-Egyp ;
tn-Latn ; tn-Latn-ZA ; tn ;
tn-ZA ; tn-Latn-ZA ; tn ;
to ; to-Latn-TO ; to ;
to-AQ ; to-Latn-AQ ; to-AQ ;
to-Egyp ; to-Egyp-TO ; to-Egyp ;
Expand Down Expand Up @@ -1107,14 +1162,14 @@ und-Cyrl-BY ; be-Cyrl-BY ; be ;
und-Cyrl-KG ; ky-Cyrl-KG ; ky ;
und-Cyrl-KZ ; ru-Cyrl-KZ ; ru-KZ ;
und-Cyrl-MD ; uk-Cyrl-MD ; uk-MD ;
und-Cyrl-ME ; ru-Cyrl-ME ; ru-ME ;
und-Cyrl-ME ; sr-Cyrl-ME ; ;
und-Cyrl-MK ; mk-Cyrl-MK ; mk ;
und-Cyrl-MN ; mn-Cyrl-MN ; mn ;
und-Cyrl-RS ; sr-Cyrl-RS ; sr ;
und-Cyrl-RU ; ru-Cyrl-RU ; ru ;
und-Cyrl-TJ ; tg-Cyrl-TJ ; tg ;
und-Cyrl-UA ; uk-Cyrl-UA ; uk ;
und-Cyrl-UZ ; ru-Cyrl-UZ ; ru-UZ ;
und-Cyrl-UZ ; uz-Cyrl-UZ ; uz-Cyrl ;
und-Cyrl-XK ; sr-Cyrl-XK ; sr-XK ;
und-DE ; de-Latn-DE ; de ;
und-DG ; en-Latn-DG ; en-DG ;
Expand Down Expand Up @@ -1188,13 +1243,15 @@ und-Hans-AQ ; zh-Hans-AQ ; zh-AQ ;
und-Hans-CN ; zh-Hans-CN ; zh ;
und-Hans-HK ; zh-Hans-HK ; ;
und-Hans-MO ; zh-Hans-MO ; ;
und-Hans-MY ; zh-Hans-MY ; zh-MY ;
und-Hans-SG ; zh-Hans-SG ; zh-SG ;
und-Hans-TW ; zh-Hans-TW ; ;
und-Hant ; zh-Hant-TW ; zh-Hant ; zh-TW
und-Hant-AQ ; zh-Hant-AQ ; ;
und-Hant-CN ; zh-Hant-CN ; ;
und-Hant-CN ; yue-Hant-CN ; ;
und-Hant-HK ; zh-Hant-HK ; zh-HK ;
und-Hant-MO ; zh-Hant-MO ; zh-MO ;
und-Hant-MY ; zh-Hant-MY ; ;
und-Hant-SG ; zh-Hant-SG ; ;
und-Hant-TW ; zh-Hant-TW ; zh-Hant ; zh-TW
und-Hebr ; he-Hebr-IL ; he ;
Expand Down Expand Up @@ -1384,7 +1441,7 @@ und-Latn-MQ ; fr-Latn-MQ ; fr-MQ ;
und-Latn-MR ; fr-Latn-MR ; fr-MR ;
und-Latn-MS ; en-Latn-MS ; en-MS ;
und-Latn-MT ; mt-Latn-MT ; mt ;
und-Latn-MU ; mfe-Latn-MU ; mfe ;
und-Latn-MU ; en-Latn-MU ; en-MU ;
und-Latn-MV ; en-Latn-MV ; en-MV ;
und-Latn-MW ; en-Latn-MW ; en-MW ;
und-Latn-MX ; es-Latn-MX ; es-MX ;
Expand Down Expand Up @@ -1427,7 +1484,7 @@ und-Latn-SH ; en-Latn-SH ; en-SH ;
und-Latn-SI ; sl-Latn-SI ; sl ;
und-Latn-SJ ; nb-Latn-SJ ; nb-SJ ;
und-Latn-SK ; sk-Latn-SK ; sk ;
und-Latn-SL ; kri-Latn-SL ; kri ;
und-Latn-SL ; en-Latn-SL ; en-SL ;
und-Latn-SM ; it-Latn-SM ; it-SM ;
und-Latn-SN ; fr-Latn-SN ; fr-SN ;
und-Latn-SO ; so-Latn-SO ; so ;
Expand All @@ -1441,7 +1498,7 @@ und-Latn-SZ ; en-Latn-SZ ; en-SZ ;
und-Latn-TC ; en-Latn-TC ; en-TC ;
und-Latn-TD ; fr-Latn-TD ; fr-TD ;
und-Latn-TG ; fr-Latn-TG ; fr-TG ;
und-Latn-TK ; tkl-Latn-TK ; tkl ;
und-Latn-TK ; en-Latn-TK ; en-TK ;
und-Latn-TL ; pt-Latn-TL ; pt-TL ;
und-Latn-TM ; tk-Latn-TM ; tk ;
und-Latn-TN ; fr-Latn-TN ; fr-TN ;
Expand All @@ -1467,7 +1524,7 @@ und-Latn-WS ; sm-Latn-WS ; sm ;
und-Latn-XK ; sq-Latn-XK ; sq-XK ;
und-Latn-YT ; fr-Latn-YT ; fr-YT ;
und-Latn-ZA ; en-Latn-ZA ; en-ZA ;
und-Latn-ZM ; bem-Latn-ZM ; bem ;
und-Latn-ZM ; en-Latn-ZM ; en-ZM ;
und-Latn-ZW ; sn-Latn-ZW ; sn ;
und-MA ; ar-Arab-MA ; ar-MA ;
und-MC ; fr-Latn-MC ; fr-MC ;
Expand Down Expand Up @@ -1613,6 +1670,9 @@ und-WS ; sm-Latn-WS ; sm ;
und-XK ; sq-Latn-XK ; sq-XK ;
und-YE ; ar-Arab-YE ; ar-YE ;
und-YT ; fr-Latn-YT ; fr-YT ;
und-Yiii ; ii-Yiii-CN ; ii ;
und-Yiii-AQ ; ii-Yiii-AQ ; ii-AQ ;
und-Yiii-CN ; ii-Yiii-CN ; ii ;
und-ZA ; en-Latn-ZA ; en-ZA ;
und-ZM ; bem-Latn-ZM ; bem ;
und-ZW ; sn-Latn-ZW ; sn ;
Expand Down Expand Up @@ -1691,6 +1751,7 @@ zh-HK ; zh-Hant-HK ; zh-HK ;
zh-Hans ; zh-Hans-CN ; zh ;
zh-Hant ; zh-Hant-TW ; zh-Hant ; zh-TW
zh-MO ; zh-Hant-MO ; zh-MO ;
zh-MY ; zh-Hans-MY ; zh-MY ;
zh-SG ; zh-Hans-SG ; zh-SG ;
zh-TW ; zh-Hant-TW ; zh-Hant ; zh-TW
zu ; zu-Latn-ZA ; zu ;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.unicode.cldr.util.LocaleNames;
import org.unicode.cldr.util.SupplementalDataInfo;

@Deprecated
public class GenerateLikelySubtagTests {
private static final String SEPARATOR = CldrUtility.LINE_SEPARATOR;
private static final OutputStyle OUTPUT_STYLE = OutputStyle.XML;
Expand All @@ -27,7 +28,7 @@ public class GenerateLikelySubtagTests {
public static void main(String[] args) throws IOException {
if (true) {
throw new IllegalArgumentException(
"This tool should not be used in its current state.");
"Deprecated — it appears that we don't need this, but keeping until we are sure.");
}
out =
FileUtilities.openUTF8Writer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -421,9 +421,9 @@ public static void main(String[] args) throws IOException {
{"und_Kana", "ja_Kana_JP"},
{"und_Kana_JP", "ja_Kana_JP"},
{"und_Latn", "en_Latn_US"},
{"und_001", "en_Latn_US"}, // to not be overridden by tok_Latn_001
{"und_001", "en_Latn_001"}, // to not be overridden by tok_Latn_001
{
"und_Latn_001", "en_Latn_US"
"und_Latn_001", "en_Latn_001"
}, // to not be overridden by tok_Latn_001
{"und_Latn_ET", "en_Latn_ET"},
{"und_Latn_NE", "ha_Latn_NE"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,6 @@ public static void main(String[] args) {
Set<String> testCases = getTestCases(data);

for (String testRaw : testCases) {
if (testRaw.startsWith("qaa")) {
int debug = 0;
}
final CLDRLocale source = CLDRLocale.getInstance(testRaw);
final String test = source.toLanguageTag();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@
}

if (!collectedBad.isEmpty()) {
warnln(

Check warning on line 558 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java

View workflow job for this annotation

GitHub Actions / build

(LikelySubtagsTest.java:558) Warning: Locales have 214 unexpected characters in main and/or aux: [҂״܀-܍०-९৲-৺੦-੯૰౦-౯໐-໙၀-၉၏႐-႙႞႟፠-፼᎐-᎙᠐-᠙꒐-꓆𞅏{a\:}{ch’}{e\:}{i\:}{k’}{o\:}{ts’}{tł’}{t’}{à\:}{á\:}{è\:}{é\:}{ì\:}{í\:}{ò\:}{ó\:}{ଅ\:}{ଆ\:}{ଏ\:}]
"Locales have "
+ collectedBad.size()
+ " unexpected characters in main and/or aux:\t"
Expand Down Expand Up @@ -890,4 +890,24 @@
}
}
}

public void testConflicts() {
// All explicit codes must be in the maximum value

for (Entry<String, String> entry : likely.entrySet()) {
final CLDRLocale source = CLDRLocale.getInstance(entry.getKey());
final CLDRLocale target = CLDRLocale.getInstance(entry.getValue());
final String info = entry.getKey() + " ➡︎ " + entry.getValue();

if (!source.getLanguage().equals("und")) {
assertEquals("Language: " + info, source.getLanguage(), target.getLanguage());
}
if (!source.getScript().isEmpty()) {
assertEquals("Script: " + info, source.getScript(), target.getScript());
}
if (!source.getRegion().isEmpty()) {
assertEquals("Region: " + info, source.getRegion(), target.getRegion());
}
}
}
}
Loading
Loading