From 415bc57c3abcf91980c1cf36e1e14835bb4a1f36 Mon Sep 17 00:00:00 2001 From: Conrad Nied Date: Thu, 21 Nov 2024 13:24:30 -0800 Subject: [PATCH] CLDR-18108 Don't demote historical scripts Updated the ConvertLanguageData script to avoid demoting historical scripts/historical langauges. Also removed multi-primary script notes from the description -- anticipating a re-design, handled by other tasks. --- common/supplemental/supplementalData.xml | 126 +++++++++--------- .../language-script-description.md | 11 +- .../cldr/tool/ConvertLanguageData.java | 31 +---- 3 files changed, 73 insertions(+), 95 deletions(-) diff --git a/common/supplemental/supplementalData.xml b/common/supplemental/supplementalData.xml index c2aea32de05..0ea3a469946 100644 --- a/common/supplemental/supplementalData.xml +++ b/common/supplemental/supplementalData.xml @@ -1291,7 +1291,7 @@ XXX Code for transations where no currency is involved - + @@ -1302,7 +1302,7 @@ XXX Code for transations where no currency is involved - + @@ -1311,7 +1311,7 @@ XXX Code for transations where no currency is involved - + @@ -1329,7 +1329,7 @@ XXX Code for transations where no currency is involved - + @@ -1342,7 +1342,7 @@ XXX Code for transations where no currency is involved - + @@ -1449,7 +1449,7 @@ XXX Code for transations where no currency is involved - + @@ -1460,7 +1460,7 @@ XXX Code for transations where no currency is involved - + @@ -1493,10 +1493,11 @@ XXX Code for transations where no currency is involved - + + - + @@ -1528,7 +1529,7 @@ XXX Code for transations where no currency is involved - + @@ -1536,19 +1537,19 @@ XXX Code for transations where no currency is involved - + - + - + @@ -1582,8 +1583,8 @@ XXX Code for transations where no currency is involved - - + + @@ -1616,7 +1617,7 @@ XXX Code for transations where no currency is involved - + @@ -1625,18 +1626,18 @@ XXX Code for transations where no currency is involved - - + + - + - + - + @@ -1667,7 +1668,7 @@ XXX Code for transations where no currency is involved - + @@ -1699,13 +1700,13 @@ XXX Code for transations where no currency is involved - + - + @@ -1717,7 +1718,7 @@ XXX Code for transations where no currency is involved - + @@ -1727,13 +1728,13 @@ XXX Code for transations where no currency is involved - + - + @@ -1745,7 +1746,8 @@ XXX Code for transations where no currency is involved - + + @@ -1840,8 +1842,9 @@ XXX Code for transations where no currency is involved - - + + + @@ -1854,7 +1857,7 @@ XXX Code for transations where no currency is involved - + @@ -1866,7 +1869,7 @@ XXX Code for transations where no currency is involved - + @@ -1891,19 +1894,19 @@ XXX Code for transations where no currency is involved - + - + - + @@ -1955,7 +1958,7 @@ XXX Code for transations where no currency is involved - + @@ -1991,7 +1994,7 @@ XXX Code for transations where no currency is involved - + @@ -2033,8 +2036,8 @@ XXX Code for transations where no currency is involved - - + + @@ -2069,7 +2072,7 @@ XXX Code for transations where no currency is involved - + @@ -2085,10 +2088,11 @@ XXX Code for transations where no currency is involved - + - - + + + @@ -2102,8 +2106,8 @@ XXX Code for transations where no currency is involved - - + + @@ -2191,7 +2195,7 @@ XXX Code for transations where no currency is involved - + @@ -2216,7 +2220,7 @@ XXX Code for transations where no currency is involved - + @@ -2262,7 +2266,7 @@ XXX Code for transations where no currency is involved - + @@ -2314,7 +2318,7 @@ XXX Code for transations where no currency is involved - + @@ -2349,7 +2353,7 @@ XXX Code for transations where no currency is involved - + @@ -2378,8 +2382,8 @@ XXX Code for transations where no currency is involved - - + + @@ -2405,21 +2409,21 @@ XXX Code for transations where no currency is involved - + - - + + - - - + + + - - + + @@ -2439,7 +2443,7 @@ XXX Code for transations where no currency is involved - + diff --git a/docs/site/development/updating-codes/update-language-script-info/language-script-description.md b/docs/site/development/updating-codes/update-language-script-info/language-script-description.md index c757b2949af..0987deda704 100644 --- a/docs/site/development/updating-codes/update-language-script-info/language-script-description.md +++ b/docs/site/development/updating-codes/update-language-script-info/language-script-description.md @@ -6,14 +6,11 @@ title: Language Script Description The [`language\_script.tsv`](https://github.com/unicode-org/cldr/blob/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/language_script.tsv) data file should list all of the language / script combinations that are in common use. Usage by country is indicated in the [`country\_language\_population.tsv`](https://github.com/unicode-org/cldr/blob/main/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/country_language_population.tsv) spreadsheet. -1. Every language needs at least 1 script considered the **primary** script. +1. Every language should have 1 script considered the **primary** script. 1. This data is used to determine [the most Likely language and region](likelysubtags-and-default-content) so there needs to be at least 1 primary value. - 2. [Changed in v47] Include a primary script for historical languages (eg. Ancient Greek, Coptic). The primary script should reflect where the majority of the written corpus originates from. -2. Languages written by significant populations with different scritps in different countries can have multiple **primary** scripts. The [likely subtags](https://www.unicode.org/cldr/charts/latest/supplemental/likely_subtags.html) patterns will use population counts to disambiguate the default script for each locale. -3. Other scripts used for a language should be marked **secondary**. + 2. __Changed in v47__ Include a primary script for historical languages (eg. Ancient Greek, Coptic). The primary script should reflect where the majority of the written corpus originates from. +2. Other scripts used for a language should be marked **secondary**. -If a language has multiple primary scripts, then it should not appear without the script tag in the country\_language\_population.tsv. For example, we should not see "az", but rather "az\_Cyrl", "az\_Latn", and so on. For each country where the language is used, we should see figures on the script\-specific values. The values may overlap, that is, we may see az\_Cyrl at 60% and az\_Latn at 55%. However, the combination with the predominantly used script **must** have a larger figure than the others. - -This is also reflected in CLDR main: languages with multiple scripts will have that reflected in their structure (eg sr\-Cyrl\-RS), with aliases for the language\-region combinations. +Languages with multiple ambiguous scripts should have that reflected in their CLDR structure (eg. `sr_Cyrl_RS`), with aliases for the language\-region combinations. In order to re-generate the XML data use ConvertLanguageData as written about in [the article about updating the language scripts](.../update-language-script-info.md). diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java index 08e61d4f566..b7c322cce83 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java @@ -2026,7 +2026,8 @@ static void getLanguage2Scripts(Set sortedInput) throws IOException { if (!checkCode(LstrType.language, language, row)) continue; for (String script : scripts.split("\\s+")) { if (!checkCode(LstrType.script, script, row)) continue; - // if the script is not modern, demote + + // Make sure the script has information Info scriptInfo = ScriptMetadata.getInfo(script); if (scriptInfo == null) { BadItem.ERROR.toString( @@ -2035,24 +2036,8 @@ static void getLanguage2Scripts(Set sortedInput) throws IOException { row); continue; } - IdUsage idUsage = scriptInfo.idUsage; - if (status == BasicLanguageData.Type.primary - && idUsage != IdUsage.RECOMMENDED) { - if (idUsage == IdUsage.ASPIRATIONAL || idUsage == IdUsage.LIMITED_USE) { - BadItem.WARNING.toString( - "Script has unexpected usage; make secondary if a Recommended script is used widely for the langauge", - idUsage + ", " + script + "=" + getULocaleScriptName(script), - row); - } else { - BadItem.ERROR.toString( - "Script is not modern; make secondary", - idUsage + ", " + script + "=" + getULocaleScriptName(script), - row); - status = BasicLanguageData.Type.secondary; - } - } - // if the language is not modern, demote + // Make sure the language code is valid if (LOCALE_ALIAS_INFO.get("language").containsKey(language)) { BadItem.ERROR.toString( "Remove/Change deprecated language", @@ -2064,15 +2049,7 @@ static void getLanguage2Scripts(Set sortedInput) throws IOException { row); continue; } - if (status == BasicLanguageData.Type.primary - && !sc.isModernLanguage(language)) { - BadItem.ERROR.toString( - "Should be secondary, language is not modern", - language + " " + getLanguageName(language), - row); - status = BasicLanguageData.Type.secondary; - } - + addLanguage2Script(language, status, script); if (row.size() > 5) { String reference = row.get(5);