Skip to content

Commit

Permalink
CLDR-17897 Stabilize LikelySubtags but adding missing primary scripts…
Browse files Browse the repository at this point in the history
… and likely subtag overrides

The generated files for ConvertLanguageData and GenerateLikelySubtags change if input files are modified. This change seeks to stablize the scripts outputs.

CLDR-17897 Add overrides to Likely Subtags
  • Loading branch information
conradarcturus committed Aug 29, 2024
1 parent 5f091da commit e5fa96d
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 16 deletions.
11 changes: 8 additions & 3 deletions common/supplemental/likelySubtags.xml
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="ha_CM" to="ha_Arab_CM"/> <!--Hausa‧?‧Cameroon ➡ Hausa‧Arabic‧Cameroon-->
<likelySubtag from="ha_SD" to="ha_Arab_SD"/> <!--Hausa‧?‧Sudan ➡ Hausa‧Arabic‧Sudan-->
<likelySubtag from="hak" to="hak_Hans_CN"/> <!--Hakka Chinese‧?‧? ➡ Hakka Chinese‧Simplified‧China-->
<likelySubtag from="hak_Hant" to="hak_Hant_TW"/> <!--Hakka Chinese‧Traditional‧? ➡ Hakka Chinese‧Traditional‧Taiwan-->
<likelySubtag from="haw" to="haw_Latn_US"/> <!--Hawaiian‧?‧? ➡ Hawaiian‧Latin‧United States-->
<likelySubtag from="haz" to="haz_Arab_AF"/> <!--Hazaragi‧?‧? ➡ Hazaragi‧Arabic‧Afghanistan-->
<likelySubtag from="he" to="he_Hebr_IL"/> <!--Hebrew‧?‧? ➡ Hebrew‧Hebrew‧Israel-->
Expand Down Expand Up @@ -434,6 +435,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="lwl" to="lwl_Thai_TH"/> <!--Eastern Lawa‧?‧? ➡ Eastern Lawa‧Thai‧Thailand-->
<likelySubtag from="lzh" to="lzh_Hans_CN"/> <!--Literary Chinese‧?‧? ➡ Literary Chinese‧Simplified‧China-->
<likelySubtag from="lzz" to="lzz_Latn_TR"/> <!--Laz‧?‧? ➡ Laz‧Latin‧Türkiye-->
<likelySubtag from="lzz_Geor" to="lzz_Geor_GE"/> <!--Laz‧Georgian‧? ➡ Laz‧Georgian‧Georgia-->
<likelySubtag from="mad" to="mad_Latn_ID"/> <!--Madurese‧?‧? ➡ Madurese‧Latin‧Indonesia-->
<likelySubtag from="maf" to="maf_Latn_CM"/> <!--Mafa‧?‧? ➡ Mafa‧Latin‧Cameroon-->
<likelySubtag from="mag" to="mag_Deva_IN"/> <!--Magahi‧?‧? ➡ Magahi‧Devanagari‧India-->
Expand Down Expand Up @@ -498,6 +500,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="mzn" to="mzn_Arab_IR"/> <!--Mazanderani‧?‧? ➡ Mazanderani‧Arabic‧Iran-->
<likelySubtag from="na" to="na_Latn_NR"/> <!--Nauru‧?‧? ➡ Nauru‧Latin‧Nauru-->
<likelySubtag from="nan" to="nan_Hans_CN"/> <!--Min Nan Chinese‧?‧? ➡ Min Nan Chinese‧Simplified‧China-->
<likelySubtag from="nan_Hant" to="nan_Hant_TW"/> <!--Min Nan Chinese‧Traditional‧? ➡ Min Nan Chinese‧Traditional‧Taiwan-->
<likelySubtag from="nap" to="nap_Latn_IT"/> <!--Neapolitan‧?‧? ➡ Neapolitan‧Latin‧Italy-->
<likelySubtag from="naq" to="naq_Latn_NA"/> <!--Nama‧?‧? ➡ Nama‧Latin‧Namibia-->
<likelySubtag from="nb" to="nb_Latn_NO"/> <!--Norwegian Bokmål‧?‧? ➡ Norwegian Bokmål‧Latin‧Norway-->
Expand Down Expand Up @@ -703,10 +706,10 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="tiv" to="tiv_Latn_NG"/> <!--Tiv‧?‧? ➡ Tiv‧Latin‧Nigeria-->
<likelySubtag from="tk" to="tk_Latn_TM"/> <!--Turkmen‧?‧? ➡ Turkmen‧Latin‧Turkmenistan-->
<likelySubtag from="tkl" to="tkl_Latn_TK"/> <!--Tokelau‧?‧? ➡ Tokelau‧Latin‧Tokelau-->
<likelySubtag from="tkr" to="tkr_Latn_AZ"/> <!--Tsakhur‧?‧? ➡ Tsakhur‧Latin‧Azerbaijan-->
<likelySubtag from="tkr" to="tkr_Cyrl_AZ"/> <!--Tsakhur‧?‧? ➡ Tsakhur‧Cyrillic‧Azerbaijan-->
<likelySubtag from="tkt" to="tkt_Deva_NP"/> <!--Kathoriya Tharu‧?‧? ➡ Kathoriya Tharu‧Devanagari‧Nepal-->
<likelySubtag from="tl" to="tl_Latn_PH"/> <!--Tagalog‧?‧? ➡ Tagalog‧Latin‧Philippines-->
<likelySubtag from="tly" to="tly_Latn_AZ"/> <!--Talysh‧?‧? ➡ Talysh‧Latin‧Azerbaijan-->
<likelySubtag from="tly" to="tly_Arab_AZ"/> <!--Talysh‧?‧? ➡ Talysh‧Arabic‧Azerbaijan-->
<likelySubtag from="tmh" to="tmh_Latn_NE"/> <!--Tamashek‧?‧? ➡ Tamashek‧Latin‧Niger-->
<likelySubtag from="tn" to="tn_Latn_ZA"/> <!--Tswana‧?‧? ➡ Tswana‧Latin‧South Africa-->
<likelySubtag from="tnr" to="tnr_Latn_SN"/> <!--Ménik‧?‧? ➡ Ménik‧Latin‧Senegal-->
Expand All @@ -725,7 +728,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="tt" to="tt_Cyrl_RU"/> <!--Tatar‧?‧? ➡ Tatar‧Cyrillic‧Russia-->
<likelySubtag from="ttj" to="ttj_Latn_UG"/> <!--Tooro‧?‧? ➡ Tooro‧Latin‧Uganda-->
<likelySubtag from="tts" to="tts_Thai_TH"/> <!--Northeastern Thai‧?‧? ➡ Northeastern Thai‧Thai‧Thailand-->
<likelySubtag from="ttt" to="ttt_Latn_AZ"/> <!--Muslim Tat‧?‧? ➡ Muslim Tat‧Latin‧Azerbaijan-->
<likelySubtag from="ttt" to="ttt_Cyrl_AZ"/> <!--Muslim Tat‧?‧? ➡ Muslim Tat‧Cyrillic‧Azerbaijan-->
<likelySubtag from="tum" to="tum_Latn_MW"/> <!--Tumbuka‧?‧? ➡ Tumbuka‧Latin‧Malawi-->
<likelySubtag from="tvl" to="tvl_Latn_TV"/> <!--Tuvalu‧?‧? ➡ Tuvalu‧Latin‧Tuvalu-->
<likelySubtag from="twq" to="twq_Latn_NE"/> <!--Tasawaq‧?‧? ➡ Tasawaq‧Latin‧Niger-->
Expand Down Expand Up @@ -1036,6 +1039,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="und_Ahom" to="aho_Ahom_IN"/> <!--?‧Ahom‧? ➡ Ahom‧Ahom‧India-->
<likelySubtag from="und_Arab" to="ar_Arab_EG"/> <!--?‧Arabic‧? ➡ Arabic‧Arabic‧Egypt-->
<likelySubtag from="und_Arab_AF" to="fa_Arab_AF"/> <!--?‧Arabic‧Afghanistan ➡ Persian‧Arabic‧Afghanistan-->
<likelySubtag from="und_Arab_AZ" to="tly_Arab_AZ"/> <!--?‧Arabic‧Azerbaijan ➡ Talysh‧Arabic‧Azerbaijan-->
<likelySubtag from="und_Arab_BN" to="ms_Arab_BN"/> <!--?‧Arabic‧Brunei ➡ Malay‧Arabic‧Brunei-->
<likelySubtag from="und_Arab_CC" to="ms_Arab_CC"/> <!--?‧Arabic‧Cocos (Keeling) Islands ➡ Malay‧Arabic‧Cocos (Keeling) Islands-->
<likelySubtag from="und_Arab_CN" to="ug_Arab_CN"/> <!--?‧Arabic‧China ➡ Uyghur‧Arabic‧China-->
Expand Down Expand Up @@ -1131,6 +1135,7 @@ not be patched by hand, as any changes made in that fashion may be lost.
<likelySubtag from="und_Hani" to="zh_Hani_CN"/> <!--?‧Han‧? ➡ Chinese‧Han‧China-->
<likelySubtag from="und_Hano" to="hnn_Hano_PH"/> <!--?‧Hanunoo‧? ➡ Hanunoo‧Hanunoo‧Philippines-->
<likelySubtag from="und_Hans" to="zh_Hans_CN"/> <!--?‧Simplified‧? ➡ Chinese‧Simplified‧China-->
<likelySubtag from="und_Hans_TW" to="nan_Hans_TW"/> <!--?‧Simplified‧Taiwan ➡ Min Nan Chinese‧Simplified‧Taiwan-->
<likelySubtag from="und_Hant" to="zh_Hant_TW"/> <!--?‧Traditional‧? ➡ Chinese‧Traditional‧Taiwan-->
<likelySubtag from="und_Hant_CA" to="yue_Hant_CA"/> <!--?‧Traditional‧Canada ➡ Cantonese‧Traditional‧Canada-->
<likelySubtag from="und_Hant_CN" to="yue_Hant_CN"/> <!--?‧Traditional‧China ➡ Cantonese‧Traditional‧China-->
Expand Down
23 changes: 14 additions & 9 deletions common/supplemental/supplementalData.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1647,7 +1647,7 @@ XXX Code for transations where no currency is involved
<language type="ha" scripts="Arab Latn"/>
<language type="ha" territories="NE NG" alt="secondary"/>
<language type="hai" scripts="Latn"/>
<language type="hak" scripts="Hans"/>
<language type="hak" scripts="Hans Hant" territories="TW"/>
<language type="hak" territories="CN" alt="secondary"/>
<language type="haw" scripts="Latn"/>
<language type="haw" territories="US" alt="secondary"/>
Expand Down Expand Up @@ -1890,7 +1890,7 @@ XXX Code for transations where no currency is involved
<language type="lv" scripts="Latn" territories="LV"/>
<language type="lwl" scripts="Thai"/>
<language type="lzh" scripts="Hans" alt="secondary"/>
<language type="lzz" scripts="Latn Geor"/>
<language type="lzz" scripts="Geor Latn"/>
<language type="mad" scripts="Latn"/>
<language type="mad" territories="ID" alt="secondary"/>
<language type="maf" scripts="Latn"/>
Expand Down Expand Up @@ -1979,7 +1979,7 @@ XXX Code for transations where no currency is involved
<language type="mzn" scripts="Arab"/>
<language type="mzn" territories="IR" alt="secondary"/>
<language type="na" scripts="Latn" territories="NR"/>
<language type="nan" scripts="Hans"/>
<language type="nan" scripts="Hans Hant" territories="TW"/>
<language type="nan" territories="CN" alt="secondary"/>
<language type="nap" scripts="Latn"/>
<language type="naq" scripts="Latn"/>
Expand Down Expand Up @@ -2070,14 +2070,15 @@ XXX Code for transations where no currency is involved
<language type="peo" scripts="Xpeo" alt="secondary"/>
<language type="pfl" scripts="Latn"/>
<language type="phn" scripts="Phnx" alt="secondary"/>
<language type="pi" scripts="Deva Sinh Thai" alt="secondary"/>
<language type="pi" scripts="Deva Mymr Sinh Thai" alt="secondary"/>
<language type="pis" scripts="Latn"/>
<language type="pis" territories="SB" alt="secondary"/>
<language type="pko" scripts="Latn"/>
<language type="pl" scripts="Latn" territories="PL"/>
<language type="pl" territories="GB" alt="secondary"/>
<language type="pms" scripts="Latn"/>
<language type="pnt" scripts="Grek Cyrl Latn"/>
<language type="pnt" scripts="Grek"/>
<language type="pnt" scripts="Cyrl Latn" alt="secondary"/>
<language type="pon" scripts="Latn"/>
<language type="pon" territories="FM" alt="secondary"/>
<language type="pqm" scripts="Latn"/>
Expand Down Expand Up @@ -2277,10 +2278,10 @@ XXX Code for transations where no currency is involved
<language type="tk" scripts="Arab Cyrl Latn" territories="TM"/>
<language type="tk" territories="AF IR" alt="secondary"/>
<language type="tkl" scripts="Latn" territories="TK"/>
<language type="tkr" scripts="Latn Cyrl"/>
<language type="tkr" scripts="Cyrl Latn"/>
<language type="tkt" scripts="Deva"/>
<language type="tli" scripts="Latn"/>
<language type="tly" scripts="Latn Arab Cyrl"/>
<language type="tly" scripts="Arab Cyrl Latn"/>
<language type="tly" territories="AZ" alt="secondary"/>
<language type="tmh" scripts="Latn"/>
<language type="tmh" territories="NE" alt="secondary"/>
Expand Down Expand Up @@ -2309,7 +2310,7 @@ XXX Code for transations where no currency is involved
<language type="ttj" scripts="Latn"/>
<language type="tts" scripts="Thai"/>
<language type="tts" territories="TH" alt="secondary"/>
<language type="ttt" scripts="Latn Cyrl"/>
<language type="ttt" scripts="Cyrl Latn"/>
<language type="ttt" scripts="Arab" alt="secondary"/>
<language type="tum" scripts="Latn"/>
<language type="tum" territories="MW" alt="secondary"/>
Expand Down Expand Up @@ -4226,7 +4227,6 @@ XXX Code for transations where no currency is involved
<languagePopulation type="ku" populationPercent="5.5"/> <!--Kurdish-->
<languagePopulation type="apc" populationPercent="5.2" references="R1173"/> <!--Levantine Arabic-->
<languagePopulation type="zza" populationPercent="1.4"/> <!--Zaza-->
<languagePopulation type="kaa" populationPercent="0.1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="kbd" populationPercent="0.77"/> <!--Kabardian-->
<languagePopulation type="az" populationPercent="0.74"/> <!--Azerbaijani-->
<languagePopulation type="az_Arab" populationPercent="0.65"/> <!--Azerbaijani (Arabic)-->
Expand All @@ -4235,6 +4235,7 @@ XXX Code for transations where no currency is involved
<languagePopulation type="bg" populationPercent="0.42"/> <!--Bulgarian-->
<languagePopulation type="ady" populationPercent="0.39"/> <!--Adyghe-->
<languagePopulation type="kiu" populationPercent="0.19"/> <!--Kirmanjki-->
<languagePopulation type="kaa" populationPercent="0.1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="hy" populationPercent="0.056"/> <!--Armenian-->
<languagePopulation type="ka" populationPercent="0.056"/> <!--Georgian-->
<languagePopulation type="sr_Latn" writingPercent="5" populationPercent="0.028" references="R1017"/> <!--Serbian (Latin)-->
Expand All @@ -4257,6 +4258,8 @@ XXX Code for transations where no currency is involved
</territory>
<territory type="TW" gdp="1143000000000" literacyPercent="96.1" population="23595300"> <!--Taiwan-->
<languagePopulation type="zh_Hant" populationPercent="95" officialStatus="official"/> <!--Chinese (Traditional)-->
<languagePopulation type="nan" populationPercent="57" officialStatus="official" references="R1219"/> <!--Min Nan Chinese-->
<languagePopulation type="hak" populationPercent="11" officialStatus="official" references="R1333"/> <!--Hakka Chinese-->
<languagePopulation type="trv" populationPercent="0.02"/> <!--Taroko-->
</territory>
<territory type="TZ" gdp="234100000000" literacyPercent="67.8" population="67462100"> <!--Tanzania-->
Expand Down Expand Up @@ -5692,6 +5695,7 @@ XXX Code for transations where no currency is involved
<reference type="R1216">This is base pop for &quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;fub&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot;&quot; lang code; ff shows as a macrolanguage</reference>
<reference type="R1217" uri="http://www.ethnologue.com/language/bkm">[missing]</reference>
<reference type="R1218" uri="http://en.wikipedia.org/wiki/Vietnamese_language">(could be higher if 2nd lang included; no data yet)</reference>
<reference type="R1219" uri="https://en.wikipedia.org/wiki/Taiwanese_Hokkien">[missing]</reference>
<reference type="R1220" uri="http://www.ethnologue.com/18/language/knf/">[missing]</reference>
<reference type="R1221" uri="https://www.cia.gov/library/publications/the-world-factbook/geos/cc.html">[missing]</reference>
<reference type="R1222" uri="http://www.ethnologue.com/show_language.asp?code=dsb">pop 7k. Figure is questionable writing pop artificially set to 5% see also http://en.wikipedia.org/wiki/Lower_Sorbian</reference>
Expand Down Expand Up @@ -5805,5 +5809,6 @@ XXX Code for transations where no currency is involved
<reference type="R1330" uri="https://en.wikipedia.org/wiki/Languages_of_the_United_Kingdom">Analyzed from 2011 UK census and other sources</reference>
<reference type="R1331" uri="https://en.wikipedia.org/wiki/Languages_of_Canada">In total 86.2% of Canadians have working knowledge of English while 29.8% have a working knowledge of French.</reference>
<reference type="R1332" uri="https://statisticsmaldives.gov.mv/statistical-release-iii-education">2014 Maldives: 98% literacy in Divehi, 75% in English</reference>
<reference type="R1333" uri="https://en.wikipedia.org/wiki/Taiwanese_Hakka">[missing]</reference>
</references>
</supplementalData>
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@ public static void main(String[] args) throws IOException {
{"ff_NG", "ff_Latn_NG"},
{"ff_SL", "ff_Latn_SL"},
{"ff_Adlm", "ff_Adlm_GN"},
{"hak_Hant", "hak_Hant_TW"},
{"ia", "ia_Latn_001"},
{"ia_Latn", "ia_Latn_001"},
{"io", "io_Latn_001"},
Expand All @@ -392,6 +393,9 @@ public static void main(String[] args) throws IOException {
{"mro", "mro_Mroo_BD"},
{"mro_BD", "mro_Mroo_BD"},
{"ms_Arab", "ms_Arab_MY"},
{"nan", "nan_Hans_CN"},
{"nan_Hans", "nan_Hans_CN"},
{"nan_Hant", "nan_Hant_TW"},
{"pap", "pap_Latn_CW"},
{"pap_Latn", "pap_Latn_CW"},
{
Expand Down Expand Up @@ -469,12 +473,14 @@ public static void main(String[] args) throws IOException {
// {"cr", "cr_Cans_CA"},
// {"hif", "hif_Latn_FJ"},
// {"gon", "gon_Telu_IN"},
// {"lzz", "lzz_Latn_TR"},
{"lzz", "lzz_Latn_TR"},
{"lzz_TR", "lzz_Latn_TR"},
{"lzz_Geor", "lzz_Geor_GE"},
// {"lif", "lif_Deva_NP"},
// {"unx", "unx_Beng_IN"},
// {"unr", "unr_Beng_IN"},
// {"ttt", "ttt_Latn_AZ"},
// {"pnt", "pnt_Grek_GR"},
// {"pnt", "pnt_Grek_GR"},
// {"tly", "tly_Latn_AZ"},
// {"tkr", "tkr_Latn_AZ"},
// {"bsq", "bsq_Bass_LR"},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ ha Hausa primary Arab Arabic
ha Hausa primary Latn Latin
hai Haida primary Latn Latin
hak Hakka Chinese primary Hans Simplified
hak Hakka Chinese primary Hant Traditional
haw Hawaiian primary Latn Latin
haz Hazaragi primary Arab Arabic
he Hebrew primary Hebr Hebrew
Expand Down Expand Up @@ -564,6 +565,7 @@ myz Classical Mandaic secondary Mand Mandaean
mzn Mazanderani primary Arab Arabic
na Nauru primary Latn Latin
nan Min Nan Chinese primary Hans Simplified
nan Min Nan Chinese primary Hant Traditional
nap Neapolitan primary Latn Latin
naq Nama primary Latn Latin
nb Norwegian (Bokmål) primary Latn Latin
Expand Down Expand Up @@ -633,16 +635,17 @@ pdt Plautdietsch primary Latn Latin
peo Old Persian secondary Xpeo Old Persian
pfl Palatine German primary Latn Latin
phn Phoenician secondary Phnx Phoenician
pi Pali primary Mymr Myanmar
pi Pali secondary Deva Devanagari
pi Pali secondary Sinh Sinhala
pi Pali secondary Thai Thai
pis Pijin primary Latn Latin
pko Pökoot primary Latn Latin
pl Polish primary Latn Latin
pms Piedmontese primary Latn Latin
pnt Pontic primary Cyrl Cyrillic
pnt Pontic primary Grek Greek
pnt Pontic primary Latn Latin
pnt Pontic secondary Cyrl Cyrillic
pnt Pontic secondary Latn Latin
pon Pohnpeian primary Latn Latin
pqm Malecite primary Latn Latin
prd Parsi-Dari primary Arab Arabic
Expand Down

0 comments on commit e5fa96d

Please sign in to comment.