diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java index 86ccdd3eccc..13ac7d2f6d1 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java @@ -1089,7 +1089,7 @@ public void TestShowRowAction() { for (String locale : localesForRowAction) { DummyPathValueInfo dummyPathValueInfo = new DummyPathValueInfo(); - dummyPathValueInfo.locale = CLDRLocale.getInstance(locale); + dummyPathValueInfo.setLocale(CLDRLocale.getInstance(locale)); CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); CLDRFile cldrFileUnresolved = testInfo.getCldrFactory().make(locale, false); @@ -1107,8 +1107,9 @@ public void TestShowRowAction() { for (PathHeader ph : sorted) { String path = ph.getOriginalPath(); SurveyToolStatus surveyToolStatus = ph.getSurveyToolStatus(); - dummyPathValueInfo.xpath = path; - dummyPathValueInfo.baselineValue = cldrFileUnresolved.getStringValue(path); + dummyPathValueInfo.setXpath(path); + dummyPathValueInfo.setBaselineValue( + cldrFileUnresolved.getStringValue(path)); StatusAction action = phase.getShowRowAction( dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo); @@ -1126,7 +1127,7 @@ public void TestShowRowAction() { "vo ==> FORBID_READONLY", StatusAction.FORBID_READONLY, action); - } else if (dummyPathValueInfo.baselineValue == null) { + } else if (dummyPathValueInfo.getBaselineValue() == null) { if (!assertEquals( "missing ==> ALLOW", StatusAction.ALLOW, action)) { warnln("\t\t" + locale + "\t" + ph); @@ -1150,7 +1151,9 @@ public void TestShowRowAction() { } actionToExamplePath.put( key, - Pair.of(dummyPathValueInfo.baselineValue != null, path)); + Pair.of( + dummyPathValueInfo.getBaselineValue() != null, + path)); } } } @@ -1210,7 +1213,7 @@ public VoterInfo getVoterInfo() { } }; - private static class DummyPathValueInfo implements PathValueInfo { + public static class DummyPathValueInfo implements PathValueInfo { private CLDRLocale locale; private String xpath; private String baselineValue; @@ -1271,6 +1274,18 @@ public CLDRLocale getLocale() { public String getXpath() { return xpath; } + + public void setLocale(CLDRLocale locale) { + this.locale = locale; + } + + public void setXpath(String xpath) { + this.xpath = xpath; + } + + public void setBaselineValue(String baselineValue) { + this.baselineValue = baselineValue; + } } final Set cldrLocales = diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java index da1d251ee87..e9ed5968ff1 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestExampleGenerator.java @@ -1,6 +1,13 @@ package org.unicode.cldr.unittest; +import com.google.common.base.Joiner; +import com.google.common.base.Objects; +import com.google.common.base.Splitter; +import com.google.common.collect.ComparisonChain; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterables; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; import com.ibm.icu.dev.test.TestFmwk; import java.io.IOException; import java.util.ArrayList; @@ -13,30 +20,59 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; +import org.unicode.cldr.test.CheckCLDR.InputMethod; +import org.unicode.cldr.test.CheckCLDR.Phase; +import org.unicode.cldr.test.CheckCLDR.StatusAction; import org.unicode.cldr.test.ExampleGenerator; import org.unicode.cldr.test.ExampleGenerator.UnitLength; +import org.unicode.cldr.unittest.TestCheckCLDR.DummyPathValueInfo; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRInfo.UserInfo; +import org.unicode.cldr.util.CLDRLocale; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.Counter; +import org.unicode.cldr.util.DtdData; +import org.unicode.cldr.util.DtdType; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.GrammarInfo; import org.unicode.cldr.util.GrammarInfo.CaseValues; import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; +import org.unicode.cldr.util.Level; +import org.unicode.cldr.util.Organization; import org.unicode.cldr.util.Pair; +import org.unicode.cldr.util.PathHeader; +import org.unicode.cldr.util.PathHeader.BaseUrl; +import org.unicode.cldr.util.PathHeader.PageId; +import org.unicode.cldr.util.PathHeader.SectionId; import org.unicode.cldr.util.PathStarrer; import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; import org.unicode.cldr.util.SupplementalDataInfo.PluralType; import org.unicode.cldr.util.UnitPathType; +import org.unicode.cldr.util.VoteResolver.VoterInfo; import org.unicode.cldr.util.With; +import org.unicode.cldr.util.XPathParts; public class TestExampleGenerator extends TestFmwk { + private static final String SKIP = "SKIP"; + + private static final Joiner CR_TAB2_JOINER = Joiner.on("\n\t\t"); + + private static final boolean CHECK_ROW_ACTION = false; + + private static final Splitter SLASH2_SPLITTER = + Splitter.on("//").omitEmptyStrings().trimResults(); + + private static final Joiner TAB_JOINER = Joiner.on('\t'); + boolean showTranslationPaths = CldrUtility.getProperty("TestExampleGenerator:showTranslationPaths", false); @@ -253,6 +289,7 @@ public void testCurrency() { "//ldml/dates/timeZoneNames/metazone[@type=\"([^\"]*+)\"]/short/daylight", "//ldml/personNames/personName[@order=\"([^\"]*+)\"][@length=\"([^\"]*+)\"][@formality=\"([^\"]*+)\"]/namePattern", "//ldml/personNames/personName[@order=\"([^\"]*+)\"][@length=\"([^\"]*+)\"][@usage=\"([^\"]*+)\"][@formality=\"([^\"]*+)\"]/namePattern"); // CLDR-15384 + // Add to above if the background SHOULD appear, but we don't have them yet. TODO Add later public void TestAllPaths() { @@ -1645,4 +1682,398 @@ public void TestUnicodeSetExamples() { assertEquals(locale + path + "=" + value, expected, actual); } } + + static final class MissingKey implements Comparable { + final SectionId sectionId; + final PageId pageId; + final String starred; + + public MissingKey(SectionId sectionId, PageId pageId, String starred) { + this.sectionId = sectionId; + this.pageId = pageId; + this.starred = starred; + } + + @Override + public int compareTo(MissingKey o) { + return ComparisonChain.start() + .compare(sectionId, o.sectionId) + .compare(pageId, o.pageId) + .compare(starred, o.starred) + .result(); + } + + @Override + public boolean equals(Object obj) { + return compareTo((MissingKey) obj) == 0; + } + + @Override + public int hashCode() { + return Objects.hashCode(sectionId, pageId, starred); + } + } + + public void TestForMissing() { + Factory factory = info.getCldrFactory(); // don't worry about examples for annotations + DtdData dtdData = DtdData.getInstance(DtdType.ldml); + PathHeader.Factory phf = PathHeader.getFactory(); + Set seenPaths = + new HashSet<>(); // assume whether there is an example is independent of locale, to + // speed up the test. + final String separator = "•"; + PathStarrer ps = new PathStarrer(); + ps.setSubstitutionPattern("*"); + + // Setup for calling phase.getShowRowAction + DummyPathValueInfo dummyPathValueInfo = null; + VoterInfo dummyVoterInfo = null; + UserInfo dummyUserInfo = null; + + // disabled, since it doesn't eliminate anything. However, left under a flag just in case it + // is useful later + if (CHECK_ROW_ACTION) { + dummyPathValueInfo = new DummyPathValueInfo(); + dummyVoterInfo = + new VoterInfo( + Organization.cldr, + org.unicode.cldr.util.VoteResolver.Level.vetter, + "somename"); + dummyUserInfo = + new UserInfo() { + @Override + public VoterInfo getVoterInfo() { + return dummyVoterInfo; + } + }; + } + + // Use representative locales: + // 'en' for the most coverage, + // 'de' and 'cs' for more complex inflections, + // 'ja' for CJK issues + + for (String localeId : List.of("de", "en", "cs", "ja")) { + CLDRFile cldrFile = factory.make(localeId, true); + CLDRFile cldrFileUnresolved = cldrFile.getUnresolved(); + + ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, info.getEnglish()); + if (CHECK_ROW_ACTION) { + dummyPathValueInfo.setLocale(CLDRLocale.getInstance(localeId)); + } + // for collecting data + + Counter countWithExamples = new Counter<>(); + Map samplesForWith = new HashMap<>(); + Counter countWithoutExamples = new Counter<>(); + Multimap samplesForWithout = TreeMultimap.create(); + Map sampleUrlForWithout = new TreeMap<>(); + TreeMultimap skipped = TreeMultimap.create(); + + // for each path in the file, check that there is an example + // or we know why not + + for (String xpath : cldrFile.fullIterable()) { + if (seenPaths.contains(xpath)) { + continue; + } + seenPaths.add(xpath); + if (xpath.endsWith("/alias")) { + continue; + } + String value = cldrFile.getStringValue(xpath); + if (value == null) { + continue; + } + + final XPathParts parts = XPathParts.getFrozenInstance(xpath); + if (dtdData.isDeprecated(parts)) { + continue; + } + Level level = SDI.getCoverageLevel(xpath, "en"); + if (level.compareTo(Level.COMPREHENSIVE) == 0) { + continue; + } + String starred = ps.set(xpath); + String attrs = ps.getAttributesString(separator); + + PathHeader ph = phf.fromPath(xpath); + if (CHECK_ROW_ACTION) { + dummyPathValueInfo.setXpath(xpath); + dummyPathValueInfo.setBaselineValue(cldrFileUnresolved.getStringValue(xpath)); + StatusAction action = + Phase.SUBMISSION.getShowRowAction( + dummyPathValueInfo, InputMethod.DIRECT, ph, dummyUserInfo); + if (action.isForbidden()) { + System.out.println(xpath + " is forbidden"); + continue; + } + } + + MissingKey key = new MissingKey(ph.getSectionId(), ph.getPageId(), starred); + String example = null; + try { + example = exampleGenerator.getExampleHtml(xpath, value); + } catch (Exception e) { + } + if (example == null) { + String missingResult = getResult(starred, attrs); + if (missingResult != null) { + skipped.put(missingResult, starred); + continue; + } + + samplesForWithout.put(key, sampleAttrAndValue(ps, separator, value)); + sampleUrlForWithout.put(key, ph.getUrl(BaseUrl.PRODUCTION, localeId)); + countWithoutExamples.add(key, 1); + } else { + if (!samplesForWith.containsKey(key)) { + samplesForWith.put(key, sampleAttrAndValue(ps, separator, value)); + } + countWithExamples.add(key, 1); + } + } + Set keys = new TreeSet<>(); + keys.addAll(countWithoutExamples.keySet()); + keys.addAll(countWithExamples.keySet()); + List missingItems = new ArrayList<>(); + + // we use the missing keys, which sort by section, page, path + + for (MissingKey key : keys) { + final long countWithout = countWithoutExamples.get(key); + if (countWithout == 0) { // ok, no missing + continue; + } + final Collection sampleForWithoutItem = samplesForWithout.get(key); + final String sampleForWithItem = samplesForWith.get(key); + final long countWith = countWithExamples.get(key); + final double doneRatio = countWith / (double) (countWith + countWithout); + missingItems.add( + TAB_JOINER.join( + doneRatio, + countWithout, + (sampleForWithItem == null + ? sampleForWithoutItem.iterator().next() + : Joiner.on("; ") + .join(Iterables.limit(sampleForWithoutItem, 5))), + sampleUrlForWithout.get(key), + countWith, + (sampleForWithItem == null ? "n/a" : sampleForWithItem), + key.sectionId, + key.pageId, + key.starred)); + } + + // show all the skipped items, and logKnownIssue items + + for (Entry> entry : skipped.asMap().entrySet()) { + final String ticketComment = entry.getKey(); + final String paths = CR_TAB2_JOINER.join(entry.getValue()); + if (ticketComment.equals(SKIP)) { + logln(ticketComment + ";\n\t\t" + paths); + } else { + int spacePos = ticketComment.indexOf(' '); + logKnownIssue( + ticketComment.substring(0, spacePos), + ticketComment.substring(spacePos + 1) + + ")\n\t\t(For the following paths:\n\t\t" + + paths); + } + } + + // Here is where missing examples will show up. + // If it is ok to skip them (only when there is no reasonable example), add to + // HANDLE_MISSING data + // Otherwise add an example + + if (!missingItems.isEmpty()) { + errln( + TAB_JOINER.join(localeId, "missing examples:", missingItems.size()) + + "\n" + + "\nDone?\tWithout\tSample Attrs\tURL\tWith\tSample Attrs\tSection\tPage\tStarred Pattern\n" + + Joiner.on("\n").join(missingItems)); + } + } + } + + /** + * This is a mechanism for TestMissing exceptions: a) skipping the items where there are no + * reasonable examples b) logging known issues where we know what to do, and have filed tickets + * + *

Then only new missing examples will trigger errors. + * + *

If new structure is added, an example should be added at the same time if possible, + * otherwise it should be added with "OK". + */ + static final Map> HANDLE_MISSING; + + static { + // The format is 3 items + // a) a list of paths (separated by space or just concatenated) + // b) a return value. OK to just skip, otherwise + // c) a list of 1 or more attributes (like "mul", "zxx") or a wildcard "*" + + String[][] data = { + // mul➔«Multiple languages»; zxx➔«No linguistic content» + {SKIP, "//ldml/localeDisplayNames/languages/language[@type=\"*\"]", "mul", "zxx"}, + { + SKIP, + "//ldml/characters/moreInformation" + + "//ldml/dates/fields/field[@type=\"*\"]/relative[@type=\"*\"]" + + "//ldml/dates/timeZoneNames/gmtZeroFormat" + + "//ldml/dates/timeZoneNames/metazone[@type=\"*\"]/short/standard" + + "//ldml/numbers/symbols[@numberSystem=\"*\"]/infinity" + + "//ldml/numbers/symbols[@numberSystem=\"*\"]/nan" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraAbbr/era[@type=\"*\"][@alt=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraNames/era[@type=\"*\"][@alt=\"*\"]" + + "//ldml/typographicNames/styleName[@type=\"*\"][@subtype=\"*\"][@alt=\"*\"]", + "*" + }, + { + "CLDR-17756 Add examples of date intervals", + "//ldml/dates/calendars/calendar[@type=\"*\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"*\"]/greatestDifference[@id=\"*\"]", + "*" + }, + { + "CLDR-17756 Show \"{0} ¤¤\" with formatted number and ISO code, eg {0} ¤¤ becomes 3,5 EUR", + "//ldml/numbers/currencyFormats[@numberSystem=\"*\"]/currencyPatternAppendISO", + "*" + }, + { + "CLDR-17756 Show 2 currencies with pattern, eg EUR ➔ USD", + "//ldml/numbers/currencies/currency[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-17756 Show as part of a locale name", + "//ldml/localeDisplayNames/keys/key[@type=\"*\"]" + + "//ldml/localeDisplayNames/measurementSystemNames/measurementSystemName[@type=\"*\"]" + + "//ldml/localeDisplayNames/subdivisions/subdivision[@type=\"*\"]" + + "//ldml/localeDisplayNames/types/type[@key=\"*\"][@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show using two months, eg Januar - Juni", + "//ldml/dates/calendars/calendar[@type=\"*\"]/dateTimeFormats/intervalFormats/intervalFormatFallback", + "*" + }, + { + "CLDR-15078 Enable compound unit formatting", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/unitPattern[@count=\"*\"]" + + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/unitPattern[@count=\"*\"][@case=\"*\"]", + "*" + }, + { + "CLDR-17756 Show font with field, eg: Helvetica (kursiv), Helvetica (Kursivstellung), Helvetica (vertikale Brüch)", + "//ldml/typographicNames/styleName[@type=\"*\"][@subtype=\"*\"]" + + "//ldml/typographicNames/axisName[@type=\"*\"]" + + "//ldml/typographicNames/featureName[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show in date with both variants: formatting and standalone. That way people can see what difference it makes, eg between MMMM and LLLL", + "//ldml/dates/calendars/calendar[@type=\"*\"]/days/dayContext[@type=\"*\"]/dayWidth[@type=\"*\"]/day[@type=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/months/monthContext[@type=\"*\"]/monthWidth[@type=\"*\"]/month[@type=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/months/monthContext[@type=\"*\"]/monthWidth[@type=\"*\"]/month[@type=\"*\"][@yeartype=\"*\"]" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/quarters/quarterContext[@type=\"*\"]/quarterWidth[@type=\"*\"]/quarter[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show pattern with example", + "//ldml/dates/fields/field[@type=\"*\"]/relativePeriod", + "*" + }, + { + "CLDR-17756 Show sample name with 2 different values", + "//ldml/personNames/foreignSpaceReplacement" + + "//ldml/personNames/initialPattern[@type=\"*\"]" + + "//ldml/personNames/nativeSpaceReplacement" + + "//ldml/personNames/parameterDefault[@parameter=\"*\"]" + + "//ldml/personNames/sampleName[@item=\"*\"]/nameField[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show two units with pattern, eg 'Meter ➔ Fuß'", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-17756 Show with {0}: {0}, eg Monat: Januar", + "//ldml/dates/fields/field[@type=\"*\"]/displayName", + "*" + }, + { + "CLDR-5854 Show with appropriate amount, eg 'in 3 Jahren', and for all relatives > 1 day, add a time", + "//ldml/dates/fields/field[@type=\"*\"]/relativeTime[@type=\"*\"]/relativeTimePattern[@count=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with formattted date, including era", + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraAbbr/era[@type=\"*\"]\n" + + "//ldml/dates/calendars/calendar[@type=\"*\"]/eras/eraNames/era[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with pattern, eg '30° Süd'", + "//ldml/units/unitLength[@type=\"*\"]/coordinateUnit/coordinateUnitPattern[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Show with pattern, eg Richtung: 30° Süd", + "//ldml/units/unitLength[@type=\"*\"]/coordinateUnit/displayName", + "*" + }, + { + "CLDR-17756 Show with sample characters (where possible, emoji)", + "//ldml/characterLabels/characterLabelPattern[@type=\"*\"][@count=\"*\"]\n" + + "//ldml/characterLabels/characterLabel[@type=\"*\"]\n" + + "//ldml/characterLabels/characterLabelPattern[@type=\"*\"]", + "*" + }, + { + "CLDR-17756 Use gender minimal pair patterns to show in context — look at the minimal pair examples, reversing the background", + "//ldml/units/unitLength[@type=\"*\"]/unit[@type=\"*\"]/gender", + "*" + } + }; + Map> _HANDLE_MISSING = new TreeMap<>(); + for (String[] row : data) { + if (row.length < 3) { + throw new IllegalArgumentException( + "Need 3+ values; see comments below HANDLE_MISSING"); + } + String result = row[0]; + String paths = row[1]; + for (String path : SLASH2_SPLITTER.split(paths)) { + path = "//" + path; + // note, the resulting attributeToResult may be empty + Map attributeToResult = _HANDLE_MISSING.get(path); + if (attributeToResult == null) { + _HANDLE_MISSING.put(path, attributeToResult = new TreeMap<>()); + } + for (int i = 2; i < row.length; ++i) { + String attribute = row[i]; + attributeToResult.put(attribute, result); + } + } + } + HANDLE_MISSING = CldrUtility.protectCollection(_HANDLE_MISSING); + } + + private String getResult(String starredPath, String attr) { + Map attributeToResult = HANDLE_MISSING.get(starredPath); + if (attributeToResult == null) { + return null; + } + String result = attributeToResult.get(attr); + if (result == null) { + result = attributeToResult.get("*"); // wildcard + } + return result; + } + + public String sampleAttrAndValue(PathStarrer ps, final String separator, String value) { + return ps.getAttributesString(separator) + "➔«" + value + "»"; + } }