Skip to content

Commit

Permalink
CLDR-17407 check for anomalies and fix (#3522)
Browse files Browse the repository at this point in the history
* CLDR-17407 check for anomalies and fix

* CLDR-17407 Revert changes to beaufort, add hack to address deprecated timezone ids.
  • Loading branch information
macchiati authored Feb 27, 2024
1 parent ef012e8 commit a5825d3
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 15 deletions.
1 change: 0 additions & 1 deletion common/properties/coverageLevels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ th ; modern ; Thai
ti ; basic ; Tigrinya
tk ; modern ; Turkmen
to ; basic ; Tongan
tok ; basic ; Toki Pona
tr ; modern ; Turkish
tt ; basic ; Tatar
ug ; basic ; Uyghur
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ private Map<String, List<String>> getCodeData(CodeType type) {
return type_code_data.get(type);
}

public Set<String> getCodes(CodeType type) {
return type_code_data.get(type).keySet();
}

/**
* Get at the language registry values, as a Map from label to value.
*
Expand Down Expand Up @@ -275,7 +279,7 @@ public Set<String> getGoodAvailableCodes(CodeType type) {
case script:
return sd.getCLDRScriptCodes();
case tzid:
break; // nothing special
return sd.getCLDRTimezoneCodes();
default:
for (Iterator<String> it = result.iterator(); it.hasNext(); ) {
String code = it.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.IterableComparator;
import com.ibm.icu.impl.Relation;
Expand Down Expand Up @@ -74,6 +77,7 @@
import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
import org.unicode.cldr.util.Rational.RationalParser;
import org.unicode.cldr.util.StandardCodes.CodeType;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
import org.unicode.cldr.util.SupplementalDataInfo.NumberingSystemInfo.NumberingSystemType;
Expand Down Expand Up @@ -984,6 +988,10 @@ public enum RBNFGroup {
public Map<Row.R2<String, String>, String> bcp47Since = new TreeMap<>();
public Map<Row.R2<String, String>, String> bcp47Preferred = new TreeMap<>();
public Map<Row.R2<String, String>, String> bcp47Deprecated = new TreeMap<>();

Map<String, Map<String, Bcp47KeyInfo>> bcp47KeyToSubtypeToInfo = new TreeMap<>();
Map<String, Map<String, String>> bcp47KeyToAliasToSubtype = new TreeMap<>();

public Map<String, String> bcp47ValueType = new TreeMap<>();

public Map<String, Row.R2<String, String>> validityInfo = new LinkedHashMap<>();
Expand Down Expand Up @@ -1148,6 +1156,34 @@ private SupplementalDataInfo(File directory) {
this.validity = Validity.getInstance(directory.toString() + "/../validity/");
} // hide

public static class Bcp47KeyInfo {
public Bcp47KeyInfo(
Set<String> aliases,
String description,
String since,
String preferred,
String deprecated) {
this.description = description;
this.deprecated = !(deprecated == null || deprecated.equals("false"));
this.preferred = preferred;
this.since = since == null ? null : VersionInfo.getInstance(since);
this.aliases = aliases;
}

final String description;
final VersionInfo since;
final String preferred;
final boolean deprecated;
final Set<String> aliases;

@Override
public String toString() {
return String.format(
"{description=«%s» since=%s preferred=%s deprecated=%s aliases=%s}",
description, since, preferred, deprecated, aliases);
}
}

private void makeStuffSafe() {
// now make stuff safe
allLanguages.addAll(languageToPopulation.keySet());
Expand Down Expand Up @@ -1227,19 +1263,54 @@ private void makeStuffSafe() {
}
typeToLocaleToDayPeriodInfo = CldrUtility.protectCollection(typeToLocaleToDayPeriodInfo);
languageMatch = CldrUtility.protectCollection(languageMatch);
bcp47Key2Subtypes.freeze();

bcp47Extension2Keys.freeze();
bcp47Aliases.freeze();
bcp47Key2Subtypes.freeze();
CldrUtility.protectCollection(bcp47ValueType);
if (bcp47Key2Subtypes.isEmpty()) {
throw new InternalError(
"No BCP47 key 2 subtype data was loaded from bcp47 dir "
+ getBcp47Directory().getAbsolutePath());
}

bcp47Aliases.freeze();
CldrUtility.protectCollection(bcp47Descriptions);
CldrUtility.protectCollection(bcp47Since);
CldrUtility.protectCollection(bcp47Preferred);
CldrUtility.protectCollection(bcp47Deprecated);
CldrUtility.protectCollection(bcp47ValueType);

// create clean structure

for (Entry<String, Set<String>> entry : bcp47Extension2Keys.keyValuesSet()) {
for (String key : entry.getValue()) {
Map<String, Bcp47KeyInfo> subtypeToInfo = bcp47KeyToSubtypeToInfo.get(key);
if (subtypeToInfo == null) {
bcp47KeyToSubtypeToInfo.put(key, subtypeToInfo = new TreeMap<>());
}
Map<String, String> aliasToRegular = bcp47KeyToAliasToSubtype.get(key);
if (aliasToRegular == null) {
bcp47KeyToAliasToSubtype.put(key, aliasToRegular = new TreeMap<>());
}
for (String subtype : bcp47Key2Subtypes.get(key)) {
final R2<String, String> pair = R2.of(key, subtype);
final Set<String> aliases = bcp47Aliases.get(pair);
final Bcp47KeyInfo info =
new Bcp47KeyInfo(
aliases,
bcp47Descriptions.get(pair),
bcp47Since.get(pair),
bcp47Preferred.get(pair),
bcp47Deprecated.get(pair));
subtypeToInfo.put(subtype, info);
final Map<String, String> aliasToRegularFinal = aliasToRegular;
if (aliases != null) {
aliases.forEach(x -> aliasToRegularFinal.put(x, subtype));
}
}
}
}
bcp47KeyToSubtypeToInfo = CldrUtility.protectCollection(bcp47KeyToSubtypeToInfo);
bcp47KeyToAliasToSubtype = CldrUtility.protectCollection(bcp47KeyToAliasToSubtype);

CoverageLevelInfo.fixEU(coverageLevels, this);
coverageLevels = Collections.unmodifiableSortedSet(coverageLevels);
Expand Down Expand Up @@ -5197,4 +5268,64 @@ public UnitPrefixInfo getUnitPrefixInfo(String prefix) {
public Set<String> getUnitPrefixes() {
return unitPrefixInfo.keySet();
}

/**
* Filter out deprecated items. This is more complicated than it seems. The deprecation is in
* timezones.xml, eg: <type name="cathu" description="Thunder Bay, Canada" deprecated="true"
* preferred="cator"/> <type name="cator" description="Toronto, Canada" alias="America/Toronto
* America/Montreal Canada/Eastern America/Nipigon America/Thunder_Bay"/> We need to find the
* short id's that are deprecated, put there is a problem due to
* https://unicode-org.atlassian.net/browse/CLDR-17412.
*
* <p>America/Nipigon, America/Thunder_Bay, America/Rainy_River
*/
Supplier<Set<String>> goodTimezones =
Suppliers.memoize(
new Supplier<Set<String>>() {

@Override
public Set<String> get() {
Set<String> availableLongTz = sc.getAvailableCodes(CodeType.tzid);
Set<String> result = null;
if (true) { // hack for now
final Set<String> hack =
Set.of(
"America/Santa_Isabel",
"Australia/Currie",
"America/Yellowknife",
"America/Rainy_River",
"America/Thunder_Bay",
"America/Nipigon",
"America/Pangnirtung",
"Europe/Uzhgorod",
"Europe/Zaporozhye",
"Pacific/Johnston");
result = Set.copyOf(Sets.difference(availableLongTz, hack));
} else { // TODO restore when CLDR-17412 is fixed
Map<String, String> aliasToRegular =
bcp47KeyToAliasToSubtype.get("tz");
Map<String, Bcp47KeyInfo> subtypeToInfo =
bcp47KeyToSubtypeToInfo.get("tz");
result =
availableLongTz.stream()
.filter(
x -> {
String shortId = aliasToRegular.get(x);
Bcp47KeyInfo info =
subtypeToInfo.get(shortId);
System.out.println(
String.format(
"%s %s %s",
x, shortId, info));
return !info.deprecated;
})
.collect(Collectors.toUnmodifiableSet());
}
return result;
}
});

public Set<String> getCLDRTimezoneCodes() {
return goodTimezones.get();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Google ; nn ; Modern ; Nynorsk
Google ; no ; modern ; T2 Norwegian (Bokmål)
Google ; or ; modern ; T5 Odia
Google ; pa ; modern ; T4.1 Punjabi
Google ; pcm ; modern ; Nigerian Pidgin
Google ; pcm ; moderate ; Nigerian Pidgin
Google ; pl ; modern ; T1 Polish
Google ; ps ; modern ; T5 Pashto
Google ; pt ; modern ; T1 Brazilian Portuguese
Expand Down Expand Up @@ -283,7 +283,7 @@ Apple ; kn ; modern
Apple ; ko ; modern
Apple ; lt ; modern
Apple ; lv ; modern
Apple ; mi ; modern
Apple ; mi ; moderate
Apple ; mk ; modern
Apple ; ml ; modern
Apple ; mr ; modern
Expand Down Expand Up @@ -485,7 +485,7 @@ Cldr ; cv ; basic
Cldr ; en_AU ; modern
Cldr ; es_MX ; modern
Cldr ; fr_CA ; modern
Cldr ; mi ; modern
Cldr ; mi ; moderate
Cldr ; zh_Hant_HK ; modern

#Cldr other (from Google)
Expand Down Expand Up @@ -516,7 +516,7 @@ Cldr ; su ; basic ; Sundanese (script TBD)
Cldr ; ks_Deva ; basic ; Kashmiri (Devanagari)
Cldr ; sd_Deva ; basic ; Sindhi (Devanagari script)
# Cldr ; cad ; basic ; Caddo
Cldr ; pcm ; modern ; Nigerian Pidgin
Cldr ; pcm ; moderate ; Nigerian Pidgin

Cldr ; bgc ; basic ; Haryanvi
Cldr ; bho ; basic ; Bhojpuri
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -840,14 +840,12 @@ public void TestFallbackFormat() {

public void Test4897() {
ExampleGenerator exampleGenerator = getExampleGenerator("it");
final CLDRFile cldrFile = exampleGenerator.getCldrFile();
for (String xpath :
With.in(
exampleGenerator
.getCldrFile()
.iterator(
"//ldml/dates/timeZoneNames",
exampleGenerator.getCldrFile().getComparator()))) {
String value = exampleGenerator.getCldrFile().getStringValue(xpath);
cldrFile.iterator(
"//ldml/dates/timeZoneNames", cldrFile.getComparator()))) {
String value = cldrFile.getStringValue(xpath);
String actual = exampleGenerator.getExampleHtml(xpath, value);
if (actual == null) {
if (!xpath.contains("singleCountries") && !xpath.contains("gmtZeroFormat")) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package org.unicode.cldr.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Set;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

Expand Down Expand Up @@ -30,4 +34,12 @@ void testTargetCoverageLevel(final String locale, final String level) {
"Expected getTargetCoverageLevel(%s)=%s but was %s",
locale, expectLevel, actualLevel));
}

@Test
void testTimezoneExclusions() {
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
Set<String> timezones = sdi.getCLDRTimezoneCodes();
assertTrue(timezones.contains("Europe/Andorra"));
assertFalse(timezones.contains("America/Nipigon"));
}
}

0 comments on commit a5825d3

Please sign in to comment.