Skip to content

Commit

Permalink
CLDR-17535 Update likely subtags data (#3966)
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati authored Aug 19, 2024
1 parent b4e6abf commit 1a914d1
Show file tree
Hide file tree
Showing 12 changed files with 8,329 additions and 9,698 deletions.
2 changes: 1 addition & 1 deletion common/properties/scriptMetadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ Onao; 33; 1E5D0; IN; 1; EXCLUSION; NO; NO; MIN; NO; NO # provisional data for f
Orkh; 33; 10C00; MN; 1; EXCLUSION; YES; NO; NO; NO; NO
Osge; 33; 104B5; US; 1; LIMITED_USE; NO; NO; NO; NO; YES
Osma; 33; 10480; SO; 1; EXCLUSION; NO; NO; NO; NO; NO
Ougr; 33; 10F7C; 143; 1; EXCLUSION; YES; NO; YES; NO; NO
Ougr; 33; 10F7C; CN; 1; EXCLUSION; YES; NO; YES; NO; NO
Palm; 33; 10873; SY; 1; EXCLUSION; YES; NO; NO; NO; NO
Pauc; 33; 11AC0; MM; 1; EXCLUSION; NO; NO; NO; NO; NO
Perm; 33; 1036B; RU; 1; EXCLUSION; NO; NO; NO; NO; NO
Expand Down
17,226 changes: 7,747 additions & 9,479 deletions common/supplemental/likelySubtags.xml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion common/supplemental/supplementalData.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4226,7 +4226,7 @@ XXX Code for transations where no currency is involved
<languagePopulation type="ku" populationPercent="5.5"/> <!--Kurdish-->
<languagePopulation type="apc" populationPercent="5.2" references="R1173"/> <!--Levantine Arabic-->
<languagePopulation type="zza" populationPercent="1.4"/> <!--Zaza-->
<languagePopulation type="kaa" populationPercent="1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="kaa" populationPercent="0.1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="kbd" populationPercent="0.77"/> <!--Kabardian-->
<languagePopulation type="az" populationPercent="0.74"/> <!--Azerbaijani-->
<languagePopulation type="az_Arab" populationPercent="0.65"/> <!--Azerbaijani (Arabic)-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import org.unicode.cldr.util.Containment;
import org.unicode.cldr.util.SemiFileReader;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.Validity;
import org.unicode.cldr.util.Validity.Status;
import org.unicode.cldr.util.With;

public class ScriptMetadata {
Expand Down Expand Up @@ -139,6 +142,12 @@ public static void addNameToCode(String type, Map<String, String> hashMap) {

public static final class SkipNewUnicodeException extends ICUException {}

/**
* Scripts that either have no known languages as yet (Cpmn) or are used for any language
* (Brai).
*/
public static final Set<String> SCRIPTS_WITH_NO_LANGUAGES = Set.of("Brai", "Cpmn");

public static class Info implements Comparable<Info> {
public final int rank;
public final VersionInfo age;
Expand Down Expand Up @@ -173,6 +182,7 @@ private Info(String[] items) {
ime = trinaryLookup.forString(Column.IME.getItem(items));
hasCase = trinaryLookup.forString(Column.HAS_CASE.getItem(items));
density = Column.DENSITY.getInt(items, -1);
String script = items[2];

final String countryRaw = Column.ORIGIN_COUNTRY.getItem(items);
String country = CountryCodeConverter.getCodeFromName(countryRaw, false);
Expand All @@ -191,6 +201,39 @@ private Info(String[] items) {
langCode = null;
}
likelyLanguage = langCode == null ? "und" : langCode;

// check for bad countries, bad languages

final Status scriptStatus =
Validity.getInstance().getCodeToStatus(LstrType.script).get(script);
if (!(scriptStatus == Status.special || scriptStatus == Status.unknown)) {
final Status countryStatus =
Validity.getInstance().getCodeToStatus(LstrType.region).get(originCountry);
if (countryStatus != Status.regular) {
errors.add(
"ScriptMetadata.java: the country ("
+ originCountry
+ ") for "
+ script
+ " is not valid: "
+ countryStatus);
}
final Status languageStatus =
Validity.getInstance()
.getCodeToStatus(LstrType.language)
.get(likelyLanguage);
if (languageStatus != Status.regular
// make exception for scripts that has no known languages
&& !SCRIPTS_WITH_NO_LANGUAGES.contains(script)) {
errors.add(
"ScriptMetadata.java: the likely language ("
+ likelyLanguage
+ ") for "
+ script
+ " is not valid: "
+ languageStatus);
}
}
}

public Info(Info other, String string, String sampleCharacter) {
Expand Down
Loading

0 comments on commit 1a914d1

Please sign in to comment.