Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17535 Update likely subtags data #3966

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/properties/scriptMetadata.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ Onao; 33; 1E5D0; IN; 1; EXCLUSION; NO; NO; MIN; NO; NO # provisional data for f
Orkh; 33; 10C00; MN; 1; EXCLUSION; YES; NO; NO; NO; NO
Osge; 33; 104B5; US; 1; LIMITED_USE; NO; NO; NO; NO; YES
Osma; 33; 10480; SO; 1; EXCLUSION; NO; NO; NO; NO; NO
Ougr; 33; 10F7C; 143; 1; EXCLUSION; YES; NO; YES; NO; NO
Ougr; 33; 10F7C; CN; 1; EXCLUSION; YES; NO; YES; NO; NO
Palm; 33; 10873; SY; 1; EXCLUSION; YES; NO; NO; NO; NO
Pauc; 33; 11AC0; MM; 1; EXCLUSION; NO; NO; NO; NO; NO
Perm; 33; 1036B; RU; 1; EXCLUSION; NO; NO; NO; NO; NO
Expand Down
17,226 changes: 7,747 additions & 9,479 deletions common/supplemental/likelySubtags.xml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion common/supplemental/supplementalData.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4226,7 +4226,7 @@ XXX Code for transations where no currency is involved
<languagePopulation type="ku" populationPercent="5.5"/> <!--Kurdish-->
<languagePopulation type="apc" populationPercent="5.2" references="R1173"/> <!--Levantine Arabic-->
<languagePopulation type="zza" populationPercent="1.4"/> <!--Zaza-->
<languagePopulation type="kaa" populationPercent="1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="kaa" populationPercent="0.1" references="R1199"/> <!--Kara-Kalpak-->
<languagePopulation type="kbd" populationPercent="0.77"/> <!--Kabardian-->
<languagePopulation type="az" populationPercent="0.74"/> <!--Azerbaijani-->
<languagePopulation type="az_Arab" populationPercent="0.65"/> <!--Azerbaijani (Arabic)-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import org.unicode.cldr.util.Containment;
import org.unicode.cldr.util.SemiFileReader;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.Validity;
import org.unicode.cldr.util.Validity.Status;
import org.unicode.cldr.util.With;

public class ScriptMetadata {
Expand Down Expand Up @@ -139,6 +142,12 @@ public static void addNameToCode(String type, Map<String, String> hashMap) {

public static final class SkipNewUnicodeException extends ICUException {}

/**
* Scripts that either have no known languages as yet (Cpmn) or are used for any language
* (Brai).
*/
public static final Set<String> SCRIPTS_WITH_NO_LANGUAGES = Set.of("Brai", "Cpmn");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"any language" would be good script metadata (and supplemental data). Zyyy, Zxxx in the same boat.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, would be: Zyyy, Zxxx are disallowed in likely BTW. Otherwise ok?


public static class Info implements Comparable<Info> {
public final int rank;
public final VersionInfo age;
Expand Down Expand Up @@ -173,6 +182,7 @@ private Info(String[] items) {
ime = trinaryLookup.forString(Column.IME.getItem(items));
hasCase = trinaryLookup.forString(Column.HAS_CASE.getItem(items));
density = Column.DENSITY.getInt(items, -1);
String script = items[2];

final String countryRaw = Column.ORIGIN_COUNTRY.getItem(items);
String country = CountryCodeConverter.getCodeFromName(countryRaw, false);
Expand All @@ -191,6 +201,39 @@ private Info(String[] items) {
langCode = null;
}
likelyLanguage = langCode == null ? "und" : langCode;

// check for bad countries, bad languages

final Status scriptStatus =
Validity.getInstance().getCodeToStatus(LstrType.script).get(script);
if (!(scriptStatus == Status.special || scriptStatus == Status.unknown)) {
final Status countryStatus =
Validity.getInstance().getCodeToStatus(LstrType.region).get(originCountry);
if (countryStatus != Status.regular) {
errors.add(
"ScriptMetadata.java: the country ("
+ originCountry
+ ") for "
+ script
+ " is not valid: "
+ countryStatus);
}
final Status languageStatus =
Validity.getInstance()
.getCodeToStatus(LstrType.language)
.get(likelyLanguage);
if (languageStatus != Status.regular
// make exception for scripts that has no known languages
&& !SCRIPTS_WITH_NO_LANGUAGES.contains(script)) {
errors.add(
"ScriptMetadata.java: the likely language ("
+ likelyLanguage
+ ") for "
+ script
+ " is not valid: "
+ languageStatus);
}
}
}

public Info(Info other, String string, String sampleCharacter) {
Expand Down
Loading
Loading