From dd212f7de27654f2f5a6398ca77fbe84527eca34 Mon Sep 17 00:00:00 2001 From: Peter Edberg Date: Fri, 24 May 2024 10:40:25 -0700 Subject: [PATCH] CLDR-14865 improve suggestion for fixed date pattern in case of bad field --- .../org/unicode/cldr/test/CheckDates.java | 141 +++++++++++++++++- .../unicode/cldr/unittest/TestCheckCLDR.java | 40 +++++ 2 files changed, 178 insertions(+), 3 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckDates.java b/tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckDates.java index d5a92b4e13b..4d57dd8baae 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckDates.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckDates.java @@ -859,6 +859,38 @@ private static long getDateTimeinMillis( static long date4004BC = getDateTimeinMillis(-4004, 9, 23, 2, 0, 0); static Random random = new Random(0); + // We extend VariableField to implement a proper equals() method so we can use + // List methods remove() and get(). + private class MyVariableField extends DateTimePatternGenerator.VariableField { + public MyVariableField(String string) { + super(string); + } + + @Override + public boolean equals(Object object) { + if (!(object instanceof DateTimePatternGenerator.VariableField)) { + return false; + } + return (this.toString().equals(object.toString())); + } + + @Override + public int hashCode() { + return this.toString().hashCode(); + } + } + + // In a List, replace DateTimePatternGenerator.VariableField items with MyVariableField + private List updateVariableFieldInList(List items) { + for (int itemIndex = 0; itemIndex < items.size(); itemIndex++) { + Object object = items.get(itemIndex); + if (object instanceof DateTimePatternGenerator.VariableField) { + items.set(itemIndex, new MyVariableField(object.toString())); + } + } + return items; + } + private void checkPattern( DateTimePatternType dateTypePatternType, String path, @@ -866,6 +898,8 @@ private void checkPattern( String value, List result) throws ParseException { + // Map to skeleton including mapping to canonical pattern chars e.g. LLL -> MMM + // (ICU internal, for CLDR?) String skeleton = dateTimePatternGenerator.getSkeletonAllowingDuplicates(value); String skeletonCanonical = dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(value); @@ -905,6 +939,8 @@ private void checkPattern( if (dateTypePatternType == DateTimePatternType.AVAILABLE || dateTypePatternType == DateTimePatternType.INTERVAL) { + // Map to skeleton including mapping to canonical pattern chars e.g. LLL -> MMM + // (ICU internal, for CLDR?) String idCanonical = dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(id); if (skeleton.isEmpty()) { @@ -922,9 +958,108 @@ private void checkPattern( + ".", id, value)); - } else if (!dateTimePatternGenerator.skeletonsAreSimilar( + } else if (!dateTimePatternGenerator.skeletonsAreSimilar( // ICU internal for CLDR idCanonical, skeletonCanonical)) { + // Adjust pattern to match skeleton, but only width and subtype within + // canonical categories e.g. MMM -> LLLL, H -> HH. Will not change across + // canonical categories e.g. m -> M String fixedValue = dateTimePatternGenerator.replaceFieldTypes(value, id); + // check to see if that was enough; if not, may need to do more work. + String fixedValueCanonical = + dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates(fixedValue); + String valueFromId = null; + if (!dateTimePatternGenerator.skeletonsAreSimilar( + idCanonical, fixedValueCanonical)) { + // Need to do more work. Try two things to get a reasonable suggestion: + // - Getting the winning pattern (valueFromId) from availableFormats for id, + // if it is not the same as the bad value we already have. + // - Replace a pattern field in fixedValue twhose type does not match the + // corresponding field from id. + // + // Here is the first thing, getting the winning pattern (valueFromId) from + // availableFormats for id. + String availableFormatPath = + "//ldml/dates/calendars/calendar[@type=\"" + + calendar + + "\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"" + + id + + "\"]"; + valueFromId = + getCldrFileToCheck().getWinningValueWithBailey(availableFormatPath); + if (valueFromId != null + && (valueFromId.equals(value) || valueFromId.equals(fixedValue))) { + valueFromId = null; // not useful in this case + } + // + // Here is the second thing, replacing a pattern field that does not match. + // We compare FormatParser Lists for idCanonical and fixedValueCanonical + // and if a mismatch we update the FormatParser list for fixedValue and + // generate an updated string from the FormatParser. + DateTimePatternGenerator.FormatParser idCanonFormat = + new DateTimePatternGenerator.FormatParser(); + idCanonFormat.set(idCanonical); + List idCanonItems = updateVariableFieldInList(idCanonFormat.getItems()); + DateTimePatternGenerator.FormatParser fixedValueCanonFormat = + new DateTimePatternGenerator.FormatParser(); + fixedValueCanonFormat.set(fixedValueCanonical); + List fixedValueCanonItems = + updateVariableFieldInList(fixedValueCanonFormat.getItems()); + DateTimePatternGenerator.FormatParser fixedValueFormat = + new DateTimePatternGenerator.FormatParser(); + fixedValueFormat.set(fixedValue); + List fixedValueItems = + updateVariableFieldInList(fixedValueFormat.getItems()); + // For idCanonFormat and fixedValueCanonFormat we started with skeletons (no + // literal text), so the items we are comparing will all be MyVariableField. We + // iterate over idCanonItems stripping matching items from fixedValueCanonItems + // until we hopefully have one remaining item in each that do not match each + // other. Then in fixedValueItems we replace the mismatched item with the one + // from idCanonItems. + int itemIndex = idCanonItems.size(); + while (--itemIndex >= 0) { + Object idCanonItem = idCanonItems.get(itemIndex); + if (fixedValueCanonItems.remove(idCanonItem)) { + // we have a match, removed it from fixedValueCanonItems, now remove + // it from idCanonItems (ok since we are iterating backwards). + idCanonItems.remove(itemIndex); + } + } + // Hopefully this leaves us with one item in each list, the mismatch to fix. + if (idCanonItems.size() == 1 && fixedValueCanonItems.size() == 1) { + // In fixedValueItems, replace all occurrences of the single item in + // fixedValueCanonItems (bad value) with the item in idCanonItems. + // There might be more than one for e.g. intervalFormats. + Object fixedValueCanonItem = fixedValueCanonItems.get(0); // replace this + Object idCanonItem = idCanonItems.get(0); // with this + boolean didUpdate = false; + while ((itemIndex = fixedValueItems.indexOf(fixedValueCanonItem)) >= 0) { + fixedValueItems.set(itemIndex, idCanonItem); + didUpdate = true; + } + if (didUpdate) { + // Now get the updated fixedValue with this replacement + fixedValue = fixedValueFormat.toString(); + fixedValueCanonical = + dateTimePatternGenerator.getCanonicalSkeletonAllowingDuplicates( + fixedValue); + } + } + // If this replacement attempt did not work, we give up on fixedValue + if (!dateTimePatternGenerator.skeletonsAreSimilar( + idCanonical, fixedValueCanonical)) { + fixedValue = null; + } + } + // Now report problem and suggested fix + String suggestion = "(no suggestion)"; + if (fixedValue != null) { + suggestion = "(" + fixedValue + ")"; + if (valueFromId != null && !valueFromId.equals(fixedValue)) { + suggestion += " or (" + valueFromId + ")"; + } + } else if (valueFromId != null) { + suggestion = "(" + valueFromId + ")"; + } result.add( new CheckStatus() .setCause(this) @@ -934,13 +1069,13 @@ private void checkPattern( // ({2}). " + .setMessage( "Your pattern ({2}) doesn't correspond to what is asked for. Yours would be right for an ID ({1}) but not for the ID ({0}). " - + "Please change your pattern to match what was asked, such as ({3}), with the right punctuation and/or ordering for your language. See " + + "Please change your pattern to match what was asked, such as {3}, with the right punctuation and/or ordering for your language. See " + CLDRURLS.DATE_TIME_PATTERNS_URL + ".", id, skeletonCanonical, value, - fixedValue)); + suggestion)); } if (dateTypePatternType == DateTimePatternType.AVAILABLE) { // index y+w+ must correpond to pattern containing only Y+ and w+ diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java index 7bf144c500b..86ccdd3eccc 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCheckCLDR.java @@ -1037,6 +1037,46 @@ public void TestCheckDates() { testFile.remove(path1); testFile.remove(path2); } + // Test for CLDR-14865 + testFile = new CLDRFile(new SimpleXMLSource("fi")); + testFactory.addFile(testFile); + String availableFormatTestPath = + "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"GyMd\"]"; + String availableFormatValue = "d.m.y G"; // erroneous, should have M not m + testFile.add(availableFormatTestPath, availableFormatValue); + CheckCLDR c = new CheckDates(testFactory); + c.setCldrFileToCheck(testFile, options, result); + result.clear(); + c.check( + availableFormatTestPath, + availableFormatTestPath, + availableFormatValue, + options, + result); + Subtype actualSubtype = Subtype.none; + String message = null; + for (CheckStatus status : result) { + actualSubtype = status.getSubtype(); + message = status.getMessage(); + break; + } + if (actualSubtype != Subtype.incorrectDatePattern + || message == null + || !message.contains("d.M.y G")) { + String errorMessage = + "fi generic availableFormat for id=GyMd with value " + + availableFormatValue + + ":"; + if (actualSubtype != Subtype.incorrectDatePattern) { + errorMessage += + " expected Subtype.incorrectDatePattern, got " + actualSubtype + " ;"; + } + if (message == null || !message.contains("d.M.y G")) { + errorMessage += " expected message should contain suggested d.M.y G, got message: "; + errorMessage += (message == null) ? "(null)" : message; + } + errln(errorMessage); + } } /** Should be some CLDR locales, plus a locale specially allowed in limited submission */