From 7c7495776e46ab6f9c1aaeec3cfd325c4e747214 Mon Sep 17 00:00:00 2001 From: Conrad Nied Date: Tue, 20 Aug 2024 10:21:36 -0700 Subject: [PATCH] CLDR-11888 Update French speakers https://unicode-org.atlassian.net/browse/CLDR-11888 was created to update the French speakers for Djibouti but while I was researching that I found many other Francophone countries that significantly underestimated French populations. Most of those gaps probably come from the number being L1 users but the point of this file is L1+L2 users -- basically how many people in each country could use an interface in this language. See the original data in: https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf mvn package -DskipTests=true java -jar tools/cldr-code/target/cldr-code.jar ConvertLanguageData java -jar tools/cldr-code/target/cldr-code.jar GenerateLikelySubtags CLDR-11888 Update French speakers https://unicode-org.atlassian.net/browse/CLDR-11888 was created to update the French speakers for Djibouti but while I was researching that I found many other Francophone countries that significantly underestimated French populations. Most of those gaps probably come from the number being L1 users but the point of this file is L1+L2 users -- basically how many people in each country could use an interface in this language. See the original data in: https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf mvn package -DskipTests=true java -jar tools/cldr-code/target/cldr-code.jar ConvertLanguageData java -jar tools/cldr-code/target/cldr-code.jar GenerateLikelySubtags CLDR-11888 Redo automated scripts after merge conflicts --- common/supplemental/likelySubtags.xml | 4 +-- common/supplemental/supplementalData.xml | 19 +++++++------- .../localeIdentifiers/likelySubtags.txt | 13 +++++++--- .../localeIdentifiers/localeDisplayName.txt | 26 +++++++++++++++++++ .../util/data/country_language_population.tsv | 14 +++++----- 5 files changed, 53 insertions(+), 23 deletions(-) diff --git a/common/supplemental/likelySubtags.xml b/common/supplemental/likelySubtags.xml index 42eb17c054b..ce418366c17 100644 --- a/common/supplemental/likelySubtags.xml +++ b/common/supplemental/likelySubtags.xml @@ -895,7 +895,7 @@ not be patched by hand, as any changes made in that fashion may be lost. - + @@ -924,7 +924,7 @@ not be patched by hand, as any changes made in that fashion may be lost. - + diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index 7e87532db12..4eac2684ba4 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -1574,7 +1574,7 @@ XXX Code for transations where no currency is involved - + @@ -2739,9 +2739,9 @@ XXX Code for transations where no currency is involved + - @@ -2899,10 +2899,10 @@ XXX Code for transations where no currency is involved + - @@ -3188,7 +3188,7 @@ XXX Code for transations where no currency is involved - + @@ -3495,9 +3495,9 @@ XXX Code for transations where no currency is involved + - @@ -3554,8 +3554,8 @@ XXX Code for transations where no currency is involved + - @@ -3659,10 +3659,10 @@ XXX Code for transations where no currency is involved + - @@ -4225,7 +4225,7 @@ XXX Code for transations where no currency is involved - + @@ -5517,7 +5517,7 @@ XXX Code for transations where no currency is involved Many minor langs; Portuguese official In this and other sources, such as Ethnologue, there is no estimate for number of users. http://en.wikipedia.org/wiki/Filipino_language http://www.ethnologue.com/show_language.asp?code=fil Most of the population uses Creole; see also http://www.country-studies.com/haiti/creole,-literacy,-and-education.html http://en.wikipedia.org/wiki/French_language#Haiti - 400k 2nd language speakers + [missing] Official language, 37-77% literacy Official language, used in some schools. http://www.censusindia.net/cendat/datatable26.html @@ -5619,7 +5619,6 @@ XXX Code for transations where no currency is involved [missing] - Education is in French; using literacy rate * pop for French-using population English is the first language learned by half the children by the time they reach preschool age; using 92.6% of pop for the English figure - - using pop * literacy rate - 90 percent of approximately 39 million Tanzanians speak Swahili - Baganda generally don't speak Swahili, but it is in common use among the 25 million people elsewhere in the country, and is currently being implemented in schools nationwide (use 75% of Cpop for this figure) [missing] diff --git a/common/testData/localeIdentifiers/likelySubtags.txt b/common/testData/localeIdentifiers/likelySubtags.txt index aca47b1a577..d78ef667ce9 100644 --- a/common/testData/localeIdentifiers/likelySubtags.txt +++ b/common/testData/localeIdentifiers/likelySubtags.txt @@ -512,6 +512,11 @@ hsb-AQ ; hsb-Latn-AQ ; hsb-AQ ; hsb-DE ; hsb-Latn-DE ; hsb ; hsb-Egyp ; hsb-Egyp-DE ; hsb-Egyp ; hsb-Latn ; hsb-Latn-DE ; hsb ; +ht ; ht-Latn-HT ; ht ; +ht-AQ ; ht-Latn-AQ ; ht-AQ ; +ht-Egyp ; ht-Egyp-HT ; ht-Egyp ; +ht-HT ; ht-Latn-HT ; ht ; +ht-Latn ; ht-Latn-HT ; ht ; hu ; hu-Latn-HU ; hu ; hu-AQ ; hu-Latn-AQ ; hu-AQ ; hu-Egyp ; hu-Egyp-HU ; hu-Egyp ; @@ -1173,7 +1178,7 @@ und-Cyrl-UZ ; uz-Cyrl-UZ ; uz-Cyrl ; und-Cyrl-XK ; sr-Cyrl-XK ; sr-XK ; und-DE ; de-Latn-DE ; de ; und-DG ; en-Latn-DG ; en-DG ; -und-DJ ; aa-Latn-DJ ; aa-DJ ; +und-DJ ; fr-Latn-DJ ; fr-DJ ; und-DK ; da-Latn-DK ; da ; und-DM ; en-Latn-DM ; en-DM ; und-DO ; es-Latn-DO ; es-DO ; @@ -1236,7 +1241,7 @@ und-Guru-IN ; pa-Guru-IN ; pa ; und-HK ; zh-Hant-HK ; zh-HK ; und-HN ; es-Latn-HN ; es-HN ; und-HR ; hr-Latn-HR ; hr ; -und-HT ; ht-Latn-HT ; ht ; +und-HT ; fr-Latn-HT ; fr-HT ; und-HU ; hu-Latn-HU ; hu ; und-Hans ; zh-Hans-CN ; zh ; und-Hans-AQ ; zh-Hans-AQ ; zh-AQ ; @@ -1365,7 +1370,7 @@ und-Latn-CY ; tr-Latn-CY ; tr-CY ; und-Latn-CZ ; cs-Latn-CZ ; cs ; und-Latn-DE ; de-Latn-DE ; de ; und-Latn-DG ; en-Latn-DG ; en-DG ; -und-Latn-DJ ; aa-Latn-DJ ; aa-DJ ; +und-Latn-DJ ; fr-Latn-DJ ; fr-DJ ; und-Latn-DK ; da-Latn-DK ; da ; und-Latn-DM ; en-Latn-DM ; en-DM ; und-Latn-DO ; es-Latn-DO ; es-DO ; @@ -1401,7 +1406,7 @@ und-Latn-GY ; en-Latn-GY ; en-GY ; und-Latn-HK ; en-Latn-HK ; en-HK ; und-Latn-HN ; es-Latn-HN ; es-HN ; und-Latn-HR ; hr-Latn-HR ; hr ; -und-Latn-HT ; ht-Latn-HT ; ht ; +und-Latn-HT ; fr-Latn-HT ; fr-HT ; und-Latn-HU ; hu-Latn-HU ; hu ; und-Latn-IC ; es-Latn-IC ; es-IC ; und-Latn-ID ; id-Latn-ID ; id ; diff --git a/common/testData/localeIdentifiers/localeDisplayName.txt b/common/testData/localeIdentifiers/localeDisplayName.txt index 9abbc13423c..39e14107e8f 100644 --- a/common/testData/localeIdentifiers/localeDisplayName.txt +++ b/common/testData/localeIdentifiers/localeDisplayName.txt @@ -1310,6 +1310,32 @@ nl-Latn-BE; flamšćina (łaćonsce) zh-Hans-fonipa; chinšćina [zjednorjena] (FONIPA) +@locale=ht +@languageDisplay=standard + +en-MM; anglais (Myanmar [Birmanie]) +es; espagnol +es-419; espagnol (Amérique latine) +es-Cyrl-MX; espagnol (cyrillique, Mexique) +hi-Latn; hindi (latin) +nl-BE; néerlandais (Belgique) +nl-Latn-BE; néerlandais (latin, Belgique) +zh-Hans-fonipa; chinois (simplifié, alphabet phonétique international) + + +@locale=ht +@languageDisplay=dialect + +en-MM; anglais (Myanmar [Birmanie]) +es; espagnol +es-419; espagnol d’Amérique latine +es-Cyrl-MX; espagnol du Mexique (cyrillique) +hi-Latn; hindi (latin) +nl-BE; flamand +nl-Latn-BE; flamand (latin) +zh-Hans-fonipa; chinois simplifié (alphabet phonétique international) + + @locale=hu @languageDisplay=standard diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv index 7ed8c346c61..36da53753c2 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv @@ -336,7 +336,7 @@ Denmark DK "5,809,502" 99% "287,800,000,000" Swedish sv 13% No estimate avail Diego Garcia DG 500 99% "22,460,000" de_facto_official English en 495 http://en.wikipedia.org/wiki/Diego_Garcia Djibouti DJ "884,017" 68% "3,640,000,000" Afar aa 42% Djibouti DJ "884,017" 68% "3,640,000,000" official Arabic ar 7.3% -Djibouti DJ "884,017" 68% "3,640,000,000" official French fr "19,000" +Djibouti DJ "884,017" 68% "3,640,000,000" official French fr 50% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Djibouti DJ "884,017" 68% "3,640,000,000" Somali so 41% Dominica DM "74,027" 94% "783,000,000" official English en 94% Dominican Republic DO "10,298,756" 90% "173,000,000,000" English en "8,000" @@ -510,7 +510,7 @@ Guinea-Bissau GW "1,833,247" 55% "3,171,000,000" Fulah (Adlam) ff_Adlm 1 No e Guinea-Bissau GW "1,833,247" 55% "3,171,000,000" Mankanya knf "47,900" http://www.ethnologue.com/18/language/knf/ Guinea-Bissau GW "1,833,247" 55% "3,171,000,000" official Portuguese pt 100% https://www.cia.gov/cia/publications/factbook/geos/pu.html Many minor langs; Portuguese official Guyana GY "740,685" 92% "6,301,000,000" official English en 100% -Haiti HT "10,788,440" 49% "19,970,000,000" official French fr 4.7% 100% http://www.ethnologue.com/show_language.asp?code=fra 400k 2nd language speakers +Haiti HT "10,788,440" 49% "19,970,000,000" official French fr 42% 100% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Haiti HT "10,788,440" 49% "19,970,000,000" official Haitian Creole ht 81% "http://www.ethnologue.com/show_language.asp?code=hat Most of the population uses Creole; see also http://www.country-studies.com/haiti/creole,-literacy,-and-education.html http://en.wikipedia.org/wiki/French_language#Haiti" Heard & McDonald Islands HM 1 99% "53,170" Unknown language und 1 100% "https://www.cia.gov/cia/publications/factbook/geos/hm.html Uninhabited, barren, sub-Antarctic islands" Honduras HN "9,182,766" 85% "46,300,000,000" English en "40,400" @@ -762,7 +762,7 @@ Latvia LV "1,923,559" 100% "54,020,000,000" Russian ru 38% Lebanon LB "6,100,075" 90% "88,250,000,000" official Arabic ar 86% Lebanon LB "6,100,075" 90% "88,250,000,000" Armenian hy 5.2% Lebanon LB "6,100,075" 90% "88,250,000,000" English en 40% -Lebanon LB "6,100,075" 90% "88,250,000,000" French fr "22,300" +Lebanon LB "6,100,075" 90% "88,250,000,000" French fr 38% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Lebanon LB "6,100,075" 90% "88,250,000,000" Kurdish (Arabic) ku_Arab "101,000" Lebanon LB "6,100,075" 90% "88,250,000,000" Levantine Arabic apc "6,759,000" https://en.wikipedia.org/wiki/Levantine_Arabic#Speakers_by_country Lesotho LS "1,962,461" 90% "6,656,000,000" official English en 27% "http://www.ethnologue.com/show_country.asp?name=LS Lesotho English-using pop estimated at 5%, no figs available. Probably too low." @@ -844,7 +844,7 @@ Mauritania MR "3,840,429" 59% "17,280,000,000" Fulah (Adlam) ff_Adlm 1 No est Mauritania MR "3,840,429" 59% "17,280,000,000" Wolof wo "10,000" Mauritius MU "1,364,283" 89% "28,270,000,000" Bhojpuri bho 27% Mauritius MU "1,364,283" 89% "28,270,000,000" official English en 72% http://www.chass.utoronto.ca/~cpercy/courses/6362-chiba.htm -Mauritius MU "1,364,283" 89% "28,270,000,000" official French fr 3% +Mauritius MU "1,364,283" 89% "28,270,000,000" official French fr 73% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Mauritius MU "1,364,283" 89% "28,270,000,000" Morisyen mfe 90% Mauritius MU "1,364,283" 89% "28,270,000,000" Tamil ta "34,200" Mauritius MU "1,364,283" 89% "28,270,000,000" Urdu ur 5.2% @@ -885,7 +885,7 @@ Montserrat MS "5,315" 97% "167,400,000" official English en "3,490" Morocco MA "34,314,130" 67% "298,600,000,000" official Arabic ar 62% Morocco MA "34,314,130" 67% "298,600,000,000" official Central Atlas Tamazight tzm 9.8% 25% Morocco MA "34,314,130" 67% "298,600,000,000" English en 14% "http://www.ethnologue.com/show_country.asp?name=MA Ethnologue says 80k users of French. No other figures found yet, but this seems too low." -Morocco MA "34,314,130" 67% "298,600,000,000" de_facto_official French fr 20% "http://www.ethnologue.com/show_country.asp?name=MA Ethnologue says 80k users of French. No other figures found yet, but this seems too low." +Morocco MA "34,314,130" 67% "298,600,000,000" de_facto_official French fr 35% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Morocco MA "34,314,130" 67% "298,600,000,000" Moroccan Arabic ary 87% Morocco MA "34,314,130" 67% "298,600,000,000" Riffian (Tifinagh) rif_Tfng 4.9% 5% Morocco MA "34,314,130" 67% "298,600,000,000" Riffian (Latin) rif 4.9% 5% @@ -1281,7 +1281,7 @@ Sweden SE "10,040,995" 99% "518,000,000,000" recognized Tavringer Romani rmu "9, Sweden SE "10,040,995" 99% "518,000,000,000" recognized Tornedalen Finnish fit "55,500" Sweden SE "10,040,995" 99% "518,000,000,000" recognized Yiddish yi "3,000" Switzerland CH "8,292,809" 99% "523,100,000,000" English en 61% http://en.wikipedia.org/wiki/French_language -Switzerland CH "8,292,809" 99% "523,100,000,000" official French fr 21% http://en.wikipedia.org/wiki/French_language +Switzerland CH "8,292,809" 99% "523,100,000,000" official French fr 67% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Switzerland CH "8,292,809" 99% "523,100,000,000" official German de 73% Switzerland CH "8,292,809" 99% "523,100,000,000" official Italian it 4.3% Switzerland CH "8,292,809" 99% "523,100,000,000" Lombard lmo 4.1% 5% 5% writing pop estimated in absence of other data @@ -1348,7 +1348,7 @@ Trinidad & Tobago TT "1,215,527" 99% "42,850,000,000" official English en 88% Trinidad & Tobago TT "1,215,527" 99% "42,850,000,000" Spanish es "4,100" Tristan da Cunha TA 275 99% "12,350,000" English en 272 Tunisia TN "11,516,189" 79% "137,700,000,000" official Arabic ar 90% -Tunisia TN "11,516,189" 79% "137,700,000,000" official French fr 74% http://en.wikipedia.org/wiki/Tunisia#Language - using pop * literacy rate +Tunisia TN "11,516,189" 79% "137,700,000,000" official French fr 52% https://www.francophonie.org/sites/default/files/2021-04/LFDM-20Edition-2019-La-langue-fran%C3%A7aise-dans-le-monde.pdf Tunisia TN "11,516,189" 79% "137,700,000,000" Tunisian Arabic aeb 90% Turkey TR "81,257,239" 94% "2,186,000,000,000" Abkhazian ab "4,000" http://www.ethnologue.com/show_language.asp?code=abk 96% bilingual in Turkish. Turkey TR "81,257,239" 94% "2,186,000,000,000" Adyghe ady "316,000"