From e5fa96d83d534178399242dd4be3613ee9deaca9 Mon Sep 17 00:00:00 2001 From: Conrad Nied Date: Thu, 29 Aug 2024 12:38:02 -0700 Subject: [PATCH] CLDR-17897 Stabilize LikelySubtags but adding missing primary scripts and likely subtag overrides The generated files for ConvertLanguageData and GenerateLikelySubtags change if input files are modified. This change seeks to stablize the scripts outputs. CLDR-17897 Add overrides to Likely Subtags --- common/supplemental/likelySubtags.xml | 11 ++++++--- common/supplemental/supplementalData.xml | 23 +++++++++++-------- .../cldr/tool/GenerateLikelySubtags.java | 10 ++++++-- .../cldr/util/data/language_script.tsv | 7 ++++-- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/common/supplemental/likelySubtags.xml b/common/supplemental/likelySubtags.xml index 3dc3523d626..69fee1f9757 100644 --- a/common/supplemental/likelySubtags.xml +++ b/common/supplemental/likelySubtags.xml @@ -261,6 +261,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -434,6 +435,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -498,6 +500,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -703,10 +706,10 @@ not be patched by hand, as any changes made in that fashion may be lost. - + - + @@ -725,7 +728,7 @@ not be patched by hand, as any changes made in that fashion may be lost. - + @@ -1036,6 +1039,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + @@ -1131,6 +1135,7 @@ not be patched by hand, as any changes made in that fashion may be lost. + diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index 7ddde428e6c..d11deaae3b2 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -1647,7 +1647,7 @@ XXX Code for transations where no currency is involved - + @@ -1890,7 +1890,7 @@ XXX Code for transations where no currency is involved - + @@ -1979,7 +1979,7 @@ XXX Code for transations where no currency is involved - + @@ -2070,14 +2070,15 @@ XXX Code for transations where no currency is involved - + - + + @@ -2277,10 +2278,10 @@ XXX Code for transations where no currency is involved - + - + @@ -2309,7 +2310,7 @@ XXX Code for transations where no currency is involved - + @@ -4226,7 +4227,6 @@ XXX Code for transations where no currency is involved - @@ -4235,6 +4235,7 @@ XXX Code for transations where no currency is involved + @@ -4257,6 +4258,8 @@ XXX Code for transations where no currency is involved + + @@ -5692,6 +5695,7 @@ XXX Code for transations where no currency is involved This is base pop for """"""""""""""""""""""""""""""""fub"""""""""""""""""""""""""""""""" lang code; ff shows as a macrolanguage [missing] (could be higher if 2nd lang included; no data yet) + [missing] [missing] [missing] pop 7k. Figure is questionable writing pop artificially set to 5% see also http://en.wikipedia.org/wiki/Lower_Sorbian @@ -5805,5 +5809,6 @@ XXX Code for transations where no currency is involved Analyzed from 2011 UK census and other sources In total 86.2% of Canadians have working knowledge of English while 29.8% have a working knowledge of French. 2014 Maldives: 98% literacy in Divehi, 75% in English + [missing] diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java index 44dfa981b8b..e042db94948 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateLikelySubtags.java @@ -374,6 +374,7 @@ public static void main(String[] args) throws IOException { {"ff_NG", "ff_Latn_NG"}, {"ff_SL", "ff_Latn_SL"}, {"ff_Adlm", "ff_Adlm_GN"}, + {"hak_Hant", "hak_Hant_TW"}, {"ia", "ia_Latn_001"}, {"ia_Latn", "ia_Latn_001"}, {"io", "io_Latn_001"}, @@ -392,6 +393,9 @@ public static void main(String[] args) throws IOException { {"mro", "mro_Mroo_BD"}, {"mro_BD", "mro_Mroo_BD"}, {"ms_Arab", "ms_Arab_MY"}, + {"nan", "nan_Hans_CN"}, + {"nan_Hans", "nan_Hans_CN"}, + {"nan_Hant", "nan_Hant_TW"}, {"pap", "pap_Latn_CW"}, {"pap_Latn", "pap_Latn_CW"}, { @@ -469,12 +473,14 @@ public static void main(String[] args) throws IOException { // {"cr", "cr_Cans_CA"}, // {"hif", "hif_Latn_FJ"}, // {"gon", "gon_Telu_IN"}, - // {"lzz", "lzz_Latn_TR"}, + {"lzz", "lzz_Latn_TR"}, + {"lzz_TR", "lzz_Latn_TR"}, + {"lzz_Geor", "lzz_Geor_GE"}, // {"lif", "lif_Deva_NP"}, // {"unx", "unx_Beng_IN"}, // {"unr", "unr_Beng_IN"}, // {"ttt", "ttt_Latn_AZ"}, - // {"pnt", "pnt_Grek_GR"}, + // {"pnt", "pnt_Grek_GR"}, // {"tly", "tly_Latn_AZ"}, // {"tkr", "tkr_Latn_AZ"}, // {"bsq", "bsq_Bass_LR"}, diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language_script.tsv b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language_script.tsv index 19e8da361b2..a6ef6b029d6 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language_script.tsv +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language_script.tsv @@ -297,6 +297,7 @@ ha Hausa primary Arab Arabic ha Hausa primary Latn Latin hai Haida primary Latn Latin hak Hakka Chinese primary Hans Simplified +hak Hakka Chinese primary Hant Traditional haw Hawaiian primary Latn Latin haz Hazaragi primary Arab Arabic he Hebrew primary Hebr Hebrew @@ -564,6 +565,7 @@ myz Classical Mandaic secondary Mand Mandaean mzn Mazanderani primary Arab Arabic na Nauru primary Latn Latin nan Min Nan Chinese primary Hans Simplified +nan Min Nan Chinese primary Hant Traditional nap Neapolitan primary Latn Latin naq Nama primary Latn Latin nb Norwegian (Bokmål) primary Latn Latin @@ -633,6 +635,7 @@ pdt Plautdietsch primary Latn Latin peo Old Persian secondary Xpeo Old Persian pfl Palatine German primary Latn Latin phn Phoenician secondary Phnx Phoenician +pi Pali primary Mymr Myanmar pi Pali secondary Deva Devanagari pi Pali secondary Sinh Sinhala pi Pali secondary Thai Thai @@ -640,9 +643,9 @@ pis Pijin primary Latn Latin pko Pökoot primary Latn Latin pl Polish primary Latn Latin pms Piedmontese primary Latn Latin -pnt Pontic primary Cyrl Cyrillic pnt Pontic primary Grek Greek -pnt Pontic primary Latn Latin +pnt Pontic secondary Cyrl Cyrillic +pnt Pontic secondary Latn Latin pon Pohnpeian primary Latn Latin pqm Malecite primary Latn Latin prd Parsi-Dari primary Arab Arabic