-
Notifications
You must be signed in to change notification settings - Fork 384
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CLDR-17844 Modify the date report #3920
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
package org.unicode.cldr.util; | ||
|
||
import com.google.common.collect.ImmutableSet; | ||
import com.google.common.collect.Sets; | ||
import com.google.common.collect.Sets.SetView; | ||
import com.ibm.icu.text.Bidi; | ||
import com.ibm.icu.text.UnicodeSet; | ||
import com.ibm.icu.text.UnicodeSet.SpanCondition; | ||
import java.util.ArrayList; | ||
import java.util.Iterator; | ||
import java.util.LinkedHashSet; | ||
import java.util.List; | ||
import java.util.Set; | ||
import java.util.TreeSet; | ||
import java.util.stream.Collectors; | ||
|
||
/** | ||
* A set of utilities for handling BIDI, especially in charts and examples but not restricted to | ||
* that. | ||
*/ | ||
public class BidiUtils { | ||
public static final String ALERT = "⚠️"; | ||
static final String LRM = CodePointEscaper.LRM.getString(); | ||
|
||
// These are intended to be classes of characters that "stick together in order" | ||
// The initial focus is dates, so this will probably need to be expanded for numbers; might need | ||
// more syntax | ||
|
||
private enum SpanClass { | ||
NUMBERS("\\p{N}"), | ||
LETTERS_MARKS("[\\p{L}\\p{M}]"), | ||
DATE_PUNCT("[+]"), | ||
SPACES("\\p{Z}"), | ||
OTHERS("\\p{any}") // must be last, to pick up remainder. | ||
; | ||
final UnicodeSet uset; | ||
|
||
private SpanClass(String unicodeSetSource) { | ||
uset = new UnicodeSet(unicodeSetSource); | ||
} | ||
|
||
static { | ||
// clean up by removing previous values | ||
UnicodeSet soFar = new UnicodeSet(); | ||
for (SpanClass sc : SpanClass.values()) { | ||
sc.uset.removeAll(soFar).freeze(); | ||
soFar.addAll(sc.uset); | ||
} | ||
} | ||
} | ||
/** | ||
* Checks the ordering of the example, under the specified bidiDirectionOptions; | ||
* | ||
* @param example Source text, not HTMLified | ||
* @param outputReorderedResults One string for each specified bidiDirectionOption | ||
* @param bidiDirectionOptions an array of BIDI directions from com.ibm.icu.text.Bidi. if there | ||
* are no items, the default is DIRECTION_DEFAULT_LEFT_TO_RIGHT (dir="auto"), | ||
* DIRECTION_RIGHT_TO_LEFT (dir="rtl"). | ||
* @return true unless two or more of the resulting strings are different. | ||
*/ | ||
public static boolean isOrderingUnchanged( | ||
String example, List<String> outputReorderedResults, int... bidiDirectionOptions) { | ||
boolean hasList = outputReorderedResults != null; | ||
if (!hasList) { | ||
outputReorderedResults = new ArrayList<>(); | ||
} else { | ||
outputReorderedResults.clear(); | ||
} | ||
boolean result = true; | ||
for (int count = 0; count < bidiDirectionOptions.length; ++count) { | ||
String reordered = new Bidi(example, bidiDirectionOptions[count]).writeReordered(0); | ||
outputReorderedResults.add(reordered); | ||
if (result && count != 0 && !reordered.equals(outputReorderedResults.get(0))) { | ||
result = false; | ||
if (!hasList) { | ||
break; // if the output results are not needed, then stop. | ||
} | ||
} | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* Return a list of the , where each span is a sequence of: | ||
* | ||
* @param orderedLTR | ||
* @return | ||
*/ | ||
/** | ||
* Gets the 'fields' in a formatted string, used to test whether bidi reordering causes the | ||
* original fields to merge when reordered. Each field is the longest contiguous span of | ||
* characters with the same properties: * | ||
* | ||
* <ul> | ||
* <li>numbers (\p{N}) | ||
* <li>letters & marks ([\p{L}\p{M} | ||
* <li>Other | ||
* </ul> | ||
* | ||
* @param ordered | ||
* @return a set of fields, in the same order as found in the text but duplicates removed (ike | ||
* LinkedHashSeet). | ||
*/ | ||
public static Set<String> getFields(String reordred, Set<String> result) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
int start = 0; | ||
while (start < reordred.length()) { | ||
for (SpanClass sc : SpanClass.values()) { | ||
int end = sc.uset.span(reordred, start, SpanCondition.CONTAINED); | ||
if (end != start) { | ||
result.add(reordred.substring(start, end)); | ||
start = end; | ||
break; | ||
} | ||
} | ||
} | ||
return ImmutableSet.copyOf(result); | ||
} | ||
|
||
/** | ||
* Show when the fields in strings are different | ||
* | ||
* @param bidiReordereds | ||
* @return | ||
*/ | ||
public static String getAlert(List<String> bidiReordereds) { | ||
Set<Set<String>> results = new LinkedHashSet<>(); | ||
for (String bidiReordered : bidiReordereds) { | ||
Set<String> fieldsLTR = BidiUtils.getFields(bidiReordered, new TreeSet<>()); | ||
results.add(fieldsLTR); | ||
} | ||
if (results.size() < 2) { | ||
return ""; | ||
} | ||
// there can still be differences within a field of OTHERS, that we ignore. | ||
// EG ⚠️ 20,28,2B; 2B,28,20 " (+" vs " (+" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 2B,28,20 should be " ( "? |
||
|
||
// show just the difference in the first 2, for now. | ||
Iterator<Set<String>> it = results.iterator(); | ||
Set<String> first = it.next(); | ||
Set<String> second = it.next(); | ||
SetView<String> uniqueFirst = Sets.difference(first, second); | ||
SetView<String> uniqueSecond = Sets.difference(second, first); | ||
return ALERT + " " + escape(uniqueFirst) + "; " + escape(uniqueSecond); | ||
} | ||
|
||
public static String escape(Set<String> uniqueFirst) { | ||
return uniqueFirst.stream() | ||
.map(x -> CodePointEscaper.toEscaped(x)) | ||
.collect(Collectors.joining(LRM + ", " + LRM, LRM, LRM)); | ||
} | ||
|
||
public static String alphagram(String string) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. alphagram seems unused? |
||
return string.codePoints() | ||
.sorted() | ||
.collect( | ||
StringBuilder::new, // Supplier<R> supplier | ||
StringBuilder::appendCodePoint, // ObjIntConsumer<R> accumulator | ||
StringBuilder::append // BiConsumer<R,R> combiner | ||
) | ||
.toString(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
package org.unicode.cldr.util; | ||
|
||
import com.ibm.icu.impl.UnicodeMap; | ||
import com.ibm.icu.impl.Utility; | ||
import com.ibm.icu.lang.UCharacter; | ||
import com.ibm.icu.text.UTF16; | ||
import com.ibm.icu.text.UnicodeSet; | ||
import java.util.Locale; | ||
|
||
|
@@ -19,10 +21,13 @@ public enum CodePointEscaper { | |
LF(0xA, "line feed"), | ||
CR(0xD, "carriage return"), | ||
SP(0x20, "space", "ASCII space"), | ||
NSP(0x2009, "narrow/thin space", "Also known as ‘thin space’"), | ||
TSP(0x2009, "thin space", "Aka ‘narrow space’"), | ||
NBSP(0xA0, "no-break space", "Same as space, but doesn’t line wrap."), | ||
|
||
NNBSP(0x202F, "narrow/thin no-break space", "Same as narrow space, but doesn’t line wrap."), | ||
NBTSP( | ||
0x202F, | ||
"no-break thin space", | ||
"Same as thin space, but doesn’t line wrap. Aka 'narrow no-break space'"), | ||
|
||
WNJ( | ||
0x200B, | ||
|
@@ -110,9 +115,7 @@ public enum CodePointEscaper { | |
private final String description; | ||
|
||
private CodePointEscaper(int codePoint, String shortName) { | ||
this.codePoint = codePoint; | ||
this.shortName = shortName; | ||
this.description = ""; | ||
this(codePoint, shortName, ""); | ||
} | ||
|
||
private CodePointEscaper(int codePoint, String shortName, String description) { | ||
|
@@ -147,6 +150,11 @@ public int getCodePoint() { | |
return codePoint; | ||
} | ||
|
||
/** Return the string form of the code point for this character. */ | ||
public String getString() { | ||
return UTF16.valueOf(codePoint); | ||
} | ||
|
||
/** Returns the escaped form from the code point for this enum */ | ||
public String codePointToEscaped() { | ||
return ESCAPE_START + rawCodePointToEscaped(codePoint) + ESCAPE_END; | ||
|
@@ -196,6 +204,15 @@ public static String toEscaped(String unescaped, UnicodeSet toEscape) { | |
}); | ||
return result.toString(); | ||
} | ||
|
||
public static String getEscaped(int cp, UnicodeSet toEscape) { | ||
if (!toEscape.contains(cp)) { | ||
return UTF16.valueOf(cp); | ||
} else { | ||
return codePointToEscaped(cp); | ||
} | ||
} | ||
|
||
/** Return unescaped string */ | ||
public static String toUnescaped(String escaped) { | ||
if (escaped == null) { | ||
|
@@ -273,4 +290,54 @@ public static String rawCodePointToEscaped(int codePoint) { | |
? Integer.toString(codePoint, 16).toUpperCase(Locale.ROOT) | ||
: result.toString(); | ||
} | ||
|
||
public static final String getHtmlRows( | ||
UnicodeSet escapesToShow, String tableOptions, String cellOptions) { | ||
if (!escapesToShow.strings().isEmpty()) { | ||
throw new IllegalArgumentException("No strings allowed in the unicode set."); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe also confirm tableOptions and cellOptions start with spaces if not empty -- either throw IllegalArgumentException or insert the spaces if missing |
||
StringBuilder result = new StringBuilder("<table" + tableOptions + ">"); | ||
UnicodeSet remaining = new UnicodeSet(escapesToShow); | ||
String tdPlus = "<td" + cellOptions + ">"; | ||
for (CodePointEscaper cpe : CodePointEscaper.values()) { | ||
int cp = cpe.getCodePoint(); | ||
remaining.remove(cp); | ||
if (escapesToShow.contains(cpe.getCodePoint())) { | ||
final String id = cpe.name(); | ||
final String shortName = cpe.getShortName(); | ||
final String description = cpe.getDescription(); | ||
addREsult(result, tdPlus, id, shortName, description); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is capital E in addREsult a typo? |
||
} | ||
} | ||
for (String cps : remaining) { | ||
int cp = cps.codePointAt(0); | ||
final String extendedName = UCharacter.getExtendedName(cp); | ||
addREsult( | ||
result, | ||
tdPlus, | ||
Utility.hex(cp, 2), | ||
"", | ||
extendedName == null ? "" : extendedName.toLowerCase()); | ||
} | ||
return result.append("</table>").toString(); | ||
} | ||
|
||
public static void addREsult( | ||
StringBuilder result, | ||
String tdPlus, | ||
final String id, | ||
final String shortName, | ||
final String description) { | ||
result.append("<tr>") | ||
.append(tdPlus) | ||
.append(ESCAPE_START) | ||
.append(id) | ||
.append(ESCAPE_END + "</td>") | ||
.append(tdPlus) | ||
.append(shortName) | ||
.append("</td>") | ||
.append(tdPlus) | ||
.append(description) | ||
.append("</td><tr>"); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
word missing before the comma? Actually this whole comment looks unattached to anything, obsoleted by the following comment
Gets the fields