Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17844 Modify the date report #3920

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.ibm.icu.impl.UnicodeMap;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import java.util.Locale;

Expand All @@ -19,10 +20,13 @@ public enum CodePointEscaper {
LF(0xA, "line feed"),
CR(0xD, "carriage return"),
SP(0x20, "space", "ASCII space"),
NSP(0x2009, "narrow/thin space", "Also known as ‘thin space’"),
TSP(0x2009, "thin space", "Aka ‘narrow space’"),
NBSP(0xA0, "no-break space", "Same as space, but doesn’t line wrap."),

NNBSP(0x202F, "narrow/thin no-break space", "Same as narrow space, but doesn’t line wrap."),
NBTSP(
0x202F,
"no-break thin space",
"Same as thin space, but doesn’t line wrap. Aka 'narrow no-break space'"),

WNJ(
0x200B,
Expand Down Expand Up @@ -147,6 +151,11 @@ public int getCodePoint() {
return codePoint;
}

/** Return the string form of the code point for this character. */
public String getString() {
return UTF16.valueOf(codePoint);
}

/** Returns the escaped form from the code point for this enum */
public String codePointToEscaped() {
return ESCAPE_START + rawCodePointToEscaped(codePoint) + ESCAPE_END;
Expand Down Expand Up @@ -196,6 +205,15 @@ public static String toEscaped(String unescaped, UnicodeSet toEscape) {
});
return result.toString();
}

public static String getEscaped(int cp, UnicodeSet toEscape) {
if (!toEscape.contains(cp)) {
return UTF16.valueOf(cp);
} else {
return codePointToEscaped(cp);
}
}

/** Return unescaped string */
public static String toUnescaped(String escaped) {
if (escaped == null) {
Expand Down
144 changes: 129 additions & 15 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.unicode.cldr.util;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.impl.Row.R3;
import com.ibm.icu.text.Bidi;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.DateIntervalFormat;
import com.ibm.icu.text.DateIntervalInfo;
Expand All @@ -13,6 +15,7 @@
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSet.SpanCondition;
import com.ibm.icu.util.Calendar;
import com.ibm.icu.util.DateInterval;
import com.ibm.icu.util.ICUUncheckedIOException;
Expand Down Expand Up @@ -45,6 +48,15 @@
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;

public class DateTimeFormats {
private static final UnicodeSet TO_ESCAPE =
new UnicodeSet(CodePointEscaper.FORCE_ESCAPE)
.remove(CodePointEscaper.SP.getCodePoint())
.remove(CodePointEscaper.TSP.getCodePoint())
.remove(CodePointEscaper.NBSP.getCodePoint())
.remove(CodePointEscaper.NBTSP.getCodePoint())
.freeze();
private static final String MISSING_PART = "ⓜⓘⓢⓢⓘⓝⓖ";
private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
private static final Date SAMPLE_DATE_DEFAULT_END = new Date(2099 - 1900, 0, 13, 14, 45, 59);
private static final String DIR = CLDRPaths.CHART_DIRECTORY + "/verify/dates/";
private static SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
Expand Down Expand Up @@ -76,9 +88,20 @@ enum MyOptions {
// constant sets should
// probably be moved to a common file of such things.
private static final UnicodeSet BIDI_MARKS = new UnicodeSet("[:Bidi_Control:]").freeze();
private static final String exampleSep = "<br>";

private static final String ltrBackground = "background-color:#EEE;";
private static final String tableBackground = "background-color:#DDF; border: 1px solid blue;";

private static final String rtlStart = "<div dir='rtl'>";
private static final String rtlEnd = "</div>";
private static final String autoLtrStart = "<div dir='auto' style='" + ltrBackground + "'>";
private static final String autoStart = "<div dir='auto'>";
private static final String divEnd = "</div>";
private static final String tableStyle =
"style='border-collapse: collapse;" + tableBackground + " margin: auto'"; //

private static final String ltrSpan = "<span style='" + ltrBackground + "'>";
private static final String tableSpan = "<span style='" + tableBackground + "'>";
private static final String spanEnd = "</span>";

private static final String[] STOCK = {"short", "medium", "long", "full"};
private static final String[] CALENDAR_FIELD_TO_PATTERN_LETTER = {
Expand Down Expand Up @@ -125,16 +148,15 @@ enum MyOptions {
private ULocale locale;
private ICUServiceBuilder icuServiceBuilder;
private ICUServiceBuilder icuServiceBuilderEnglish =
new ICUServiceBuilder().setCldrFile(CLDRConfig.getInstance().getEnglish());
new ICUServiceBuilder().setCldrFile(CONFIG.getEnglish());

private DateIntervalInfo dateIntervalInfo = new DateIntervalInfo();
private String calendarID;
private CLDRFile file;
private boolean isRTL;

private static String surveyUrl =
CLDRConfig.getInstance()
.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");
CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");

/**
* Set a CLDRFile and calendar. Must be done before calling addTable.
Expand Down Expand Up @@ -493,7 +515,33 @@ public boolean isPresent(String skeleton) {
public void addTable(DateTimeFormats comparison, Appendable output) {
try {
output.append(
"<h2>" + hackDoubleLinked("Patterns") + "</h2>\n<table class='dtf-table'>");
"<h2>"
+ hackDoubleLinked("Patterns")
+ "</h2>"
+ "<p>Normally, there is one line containing an example in each Native Example cell. "
+ (!isRTL
? ""
: "However, two examples are provided if the locale is right-to-left, like Arabic or Hebrew, "
+ "<i>and</i> the paragraph direction can cause a different display. "
+ "The first has a <b>RTL</b> paragraph direction, "
+ "while the second has a <b>auto</b> paragraph direction (LTR unless the first 'strong' character is RTL) "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a --> an (before auto)

+ ltrSpan
+ "<i>and</i> a different background"
+ spanEnd
+ ". If the display of either example causes strings of letters or numbers to collide, "
+ "then a ⚠️ is shown. ")
+ "When an example has hidden characters, then "
+ tableSpan
+ "an extra line"
+ spanEnd
+ " shows those characters "
+ "such as ❰RLM❱ for the invisible Right-to-Left Mark. "
+ "So that the ordering of the characters in memory is clear, they are presented left-to-right one at a time. "
+ "so that the placement is clear. "
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"so that the placement is clear. " looks like leftover variant, delete?

+ "When a pattern (or a component of a pattern) is missing, it is displayed as "
+ MISSING_PART
+ ".</p>"
+ "\n<table class='dtf-table'>");
Diff diff = new Diff();
boolean is24h = generator.getDefaultHourFormatChar() == 'H';
showRow(
Expand All @@ -502,7 +550,7 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
FIELDS_TITLE,
"Skeleton",
"English Example",
"Native Example (neutral context,<br>then RTL if relevant)",
"Native Example",
false);
for (String[] nameAndSkeleton : NAME_AND_PATTERN) {
String name = nameAndSkeleton[0];
Expand Down Expand Up @@ -615,12 +663,79 @@ private String getExample(String skeleton) {
}
}
String transformedExample = TransliteratorUtilities.toHTML.transform(example);
if (isRTL || BIDI_MARKS.containsSome(transformedExample)) {
transformedExample += exampleSep + rtlStart + transformedExample + rtlEnd;
if ((isRTL || BIDI_MARKS.containsSome(example)) && !example.contains(MISSING_PART)) {
Bidi bidiLTR = new Bidi(example, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
String orderedLTR = bidiLTR.writeReordered(0);
Bidi bidiRTL = new Bidi(example, Bidi.DIRECTION_RIGHT_TO_LEFT);
String orderedRTL = bidiRTL.writeReordered(0);
if (!orderedLTR.equals(orderedRTL)) {
// since this is RTL, we put it first
String rtlVersion = rtlStart + transformedExample + divEnd; // not colored
String autoVersion = autoLtrStart + transformedExample + divEnd; // colored
Set<String> fieldsLTR = getFields(orderedLTR);
Set<String> fieldsRTL = getFields(orderedRTL);
String alert = fieldsLTR.equals(fieldsRTL) ? "" : " ⚠️ ";
transformedExample = rtlVersion + autoVersion + alert;
} else {
String autoVersion = autoStart + transformedExample + divEnd; // not colored
transformedExample = autoVersion;
}
}

if (TO_ESCAPE.containsSome(example)) {
StringBuilder processed = new StringBuilder();
example.codePoints()
.forEach(
x -> {
processed
.append("<td>")
.append(
TransliteratorUtilities.toHTML.transform(
CodePointEscaper.getEscaped(x, TO_ESCAPE)))
.append("</td>");
});

transformedExample += "<table " + tableStyle + "><tr>" + processed + "</tr></table>";
}
return transformedExample;
}

/**
* Return a list of the fields, where each span is a sequence of:
*
* <ul>
* <li>numbers (\p{N})
* <li>letters & marks ([\p{L}\p{M}
* <li>Other
* </ul>
*
* @param orderedLTR
* @return
*/
static final UnicodeSet NUMBERS = new UnicodeSet("\\p{N}").freeze();

static final UnicodeSet LETTERS_MARKS = new UnicodeSet("[\\p{L}\\p{M}]").freeze();
static final UnicodeSet OTHERS =
new UnicodeSet(NUMBERS).addAll(LETTERS_MARKS).complement().freeze();
static final Set<UnicodeSet> ALL = ImmutableSet.of(NUMBERS, LETTERS_MARKS, OTHERS);

private Set<String> getFields(String ordered) {
Set<String> result =
new LinkedHashSet<>(); // doesn't have to be a LHS, but helps with debugging
int start = 0;
while (start < ordered.length()) {
for (UnicodeSet us : ALL) {
int end = us.span(ordered, start, SpanCondition.CONTAINED);
if (end != start) {
result.add(ordered.substring(start, end));
start = end;
break;
}
}
}
return result;
}

static final Pattern RELATIVE_DATE =
PatternCache.get("®([a-z]+(?:-[a-z]+)?)+(-[a-z]+)?([+-]?\\d+)([a-zA-Z]+)?");

Expand Down Expand Up @@ -677,7 +792,7 @@ private String getRelativeExampleFromSkeleton(String skeleton) {
RelativePattern rp = new RelativePattern(file, skeleton);
String value = rp.value;
if (value == null) {
value = "ⓜⓘⓢⓢⓘⓝⓖ";
value = MISSING_PART;
} else {
DecimalFormat format = icuServiceBuilder.getNumberFormat(0);
value = value.replace("{0}", format.format(Math.abs(rp.offset)).replace("'", "''"));
Expand Down Expand Up @@ -988,10 +1103,9 @@ public static void main(String[] args) throws IOException {
String organization = MyOptions.organization.option.getValue();
String filter = MyOptions.filter.option.getValue();

Factory englishFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
CLDRFile englishFile = englishFactory.make("en", true);
CLDRFile englishFile = CONFIG.getEnglish();

Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, LOCALES);
Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
System.out.println("Total locales: " + factory.getAvailableLanguages().size());
DateTimeFormats english = new DateTimeFormats().set(englishFile, "gregorian");

Expand All @@ -1004,7 +1118,7 @@ public static void main(String[] args) throws IOException {
Map<String, String> sorted = new TreeMap<>();
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
Set<String> defaultContent = sdi.getDefaultContentLocales();
for (String localeID : factory.getAvailableLanguages()) {
for (String localeID : factory.getAvailable()) {
Level level = StandardCodes.make().getLocaleCoverageLevel(organization, localeID);
if (Level.MODERN.compareTo(level) > 0) {
continue;
Expand Down Expand Up @@ -1047,7 +1161,7 @@ public static void main(String[] args) throws IOException {
+ name
+ "</h1>"
+ "<p><a href='index.html'>Index</a></p>\n"
+ "<p>The following chart shows typical usage of date and time formatting with the Gregorian calendar. "
+ "<p>The following chart shows typical usage of date and time formatting with the Gregorian calendar and default number system. "
+ "<i>There is important information on <a target='CLDR_ST_DOCS' href='http://cldr.unicode.org/translation/date-time-review'>Date/Time Review</a>, "
+ "so please read that page before starting!</i></p>\n");
formats.addTable(english, out);
Expand Down
Loading