Skip to content

Commit

Permalink
Merge branch main into AEApple-CLDR-17839
Browse files Browse the repository at this point in the history
  • Loading branch information
AEApple committed Aug 5, 2024
2 parents 9178b5e + e302d06 commit cbe6c80
Show file tree
Hide file tree
Showing 7 changed files with 796 additions and 480 deletions.
878 changes: 439 additions & 439 deletions common/main/ak.xml

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions common/supplemental/ordinals.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2022 Unicode, Inc.
Copyright © 1991-2024 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-3.0
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
Expand Down Expand Up @@ -84,6 +84,15 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<pluralRule count="other"> @integer 0, 6~20, 100, 1000, 10000, 100000, 1000000, …</pluralRule>
</pluralRules>

<!-- 4: zero,one,few,other -->

<pluralRules locales="blo">
<pluralRule count="zero">i = 0 @integer 0</pluralRule>
<pluralRule count="one">i = 1 @integer 1</pluralRule>
<pluralRule count="few">i = 2,3,4,5,6 @integer 2~6</pluralRule>
<pluralRule count="other"> @integer 7~22, 100, 1000, 10000, 100000, 1000000, …</pluralRule>
</pluralRules>

<!-- 4: one,two,few,other -->

<pluralRules locales="en">
Expand Down Expand Up @@ -129,15 +138,6 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<pluralRule count="other"> @integer 9, 10, 19, 29, 30, 39, 49, 59, 69, 79, 109, 1000, 10000, 100000, 1000000, …</pluralRule>
</pluralRules>

<!-- 4: zero,one,few,other -->

<pluralRules locales="blo">
<pluralRule count="zero">i = 0 @integer 0</pluralRule>
<pluralRule count="one">i = 1 @integer 1</pluralRule>
<pluralRule count="few">i = 2,3,4,5,6 @integer 2~6</pluralRule>
<pluralRule count="other"> @integer 7~19, 100, 1000, 10000, 100000, 1000000, …</pluralRule>
</pluralRules>

<!-- 5: one,two,few,many,other -->

<pluralRules locales="gu hi">
Expand Down
2 changes: 1 addition & 1 deletion common/supplemental/pluralRanges.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version='1.0' encoding='UTF-8' ?>
<!DOCTYPE supplementalData SYSTEM '../../common/dtd/ldmlSupplemental.dtd'>
<!--
Copyright © 1991-2022 Unicode, Inc.
Copyright © 1991-2024 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-3.0
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
Expand Down
8 changes: 4 additions & 4 deletions common/supplemental/plurals.xml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2022 Unicode, Inc.
Copyright © 1991-2024 Unicode, Inc.
For terms of use, see http://www.unicode.org/copyright.html
SPDX-License-Identifier: Unicode-3.0
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
Expand Down Expand Up @@ -76,15 +76,15 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<pluralRule count="one">i = 0,1 and n != 0 @integer 1 @decimal 0.1~1.6</pluralRule>
<pluralRule count="other"> @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 2.0~3.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule>
</pluralRules>
<pluralRules locales="ksh">
<pluralRules locales="blo">
<pluralRule count="zero">n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000</pluralRule>
<pluralRule count="one">n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000</pluralRule>
<pluralRule count="other"> @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule>
</pluralRules>
<pluralRules locales="blo">
<pluralRules locales="ksh">
<pluralRule count="zero">n = 0 @integer 0 @decimal 0.0, 0.00, 0.000, 0.0000</pluralRule>
<pluralRule count="one">n = 1 @integer 1 @decimal 1.0, 1.00, 1.000, 1.0000</pluralRule>
<pluralRule count="other"> @integer 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 2.0~2.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule>
<pluralRule count="other"> @integer 2~17, 100, 1000, 10000, 100000, 1000000, … @decimal 0.1~0.9, 1.1~1.7, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …</pluralRule>
</pluralRules>

<!-- 3: one,two,other -->
Expand Down
162 changes: 162 additions & 0 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/BidiUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package org.unicode.cldr.util;

import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import com.ibm.icu.text.Bidi;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSet.SpanCondition;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;

/**
* A set of utilities for handling BIDI, especially in charts and examples but not restricted to
* that.
*/
public class BidiUtils {
public static final String ALERT = "⚠️";
static final String LRM = CodePointEscaper.LRM.getString();

// These are intended to be classes of characters that "stick together in order"
// The initial focus is dates, so this will probably need to be expanded for numbers; might need
// more syntax

private enum SpanClass {
NUMBERS("\\p{N}"),
LETTERS_MARKS("[\\p{L}\\p{M}]"),
DATE_PUNCT("[+]"),
SPACES("\\p{Z}"),
OTHERS("\\p{any}") // must be last, to pick up remainder.
;
final UnicodeSet uset;

private SpanClass(String unicodeSetSource) {
uset = new UnicodeSet(unicodeSetSource);
}

static {
// clean up by removing previous values
UnicodeSet soFar = new UnicodeSet();
for (SpanClass sc : SpanClass.values()) {
sc.uset.removeAll(soFar).freeze();
soFar.addAll(sc.uset);
}
}
}
/**
* Checks the ordering of the example, under the specified bidiDirectionOptions;
*
* @param example Source text, not HTMLified
* @param outputReorderedResults One string for each specified bidiDirectionOption
* @param bidiDirectionOptions an array of BIDI directions from com.ibm.icu.text.Bidi. if there
* are no items, the default is DIRECTION_DEFAULT_LEFT_TO_RIGHT (dir="auto"),
* DIRECTION_RIGHT_TO_LEFT (dir="rtl").
* @return true unless two or more of the resulting strings are different.
*/
public static boolean isOrderingUnchanged(
String example, List<String> outputReorderedResults, int... bidiDirectionOptions) {
boolean hasList = outputReorderedResults != null;
if (!hasList) {
outputReorderedResults = new ArrayList<>();
} else {
outputReorderedResults.clear();
}
boolean result = true;
for (int count = 0; count < bidiDirectionOptions.length; ++count) {
String reordered = new Bidi(example, bidiDirectionOptions[count]).writeReordered(0);
outputReorderedResults.add(reordered);
if (result && count != 0 && !reordered.equals(outputReorderedResults.get(0))) {
result = false;
if (!hasList) {
break; // if the output results are not needed, then stop.
}
}
}
return result;
}

/**
* Return a list of the , where each span is a sequence of:
*
* @param orderedLTR
* @return
*/
/**
* Gets the 'fields' in a formatted string, used to test whether bidi reordering causes the
* original fields to merge when reordered. Each field is the longest contiguous span of
* characters with the same properties: *
*
* <ul>
* <li>numbers (\p{N})
* <li>letters & marks ([\p{L}\p{M}
* <li>Other
* </ul>
*
* @param ordered
* @return a set of fields, in the same order as found in the text but duplicates removed (ike
* LinkedHashSeet).
*/
public static Set<String> getFields(String reordred, Set<String> result) {
int start = 0;
while (start < reordred.length()) {
for (SpanClass sc : SpanClass.values()) {
int end = sc.uset.span(reordred, start, SpanCondition.CONTAINED);
if (end != start) {
result.add(reordred.substring(start, end));
start = end;
break;
}
}
}
return ImmutableSet.copyOf(result);
}

/**
* Show when the fields in strings are different
*
* @param bidiReordereds
* @return
*/
public static String getAlert(List<String> bidiReordereds) {
Set<Set<String>> results = new LinkedHashSet<>();
for (String bidiReordered : bidiReordereds) {
Set<String> fieldsLTR = BidiUtils.getFields(bidiReordered, new TreeSet<>());
results.add(fieldsLTR);
}
if (results.size() < 2) {
return "";
}
// there can still be differences within a field of OTHERS, that we ignore.
// EG ⚠️ 20,28,2B; 2B,28,20 " (+" vs " (+"

// show just the difference in the first 2, for now.
Iterator<Set<String>> it = results.iterator();
Set<String> first = it.next();
Set<String> second = it.next();
SetView<String> uniqueFirst = Sets.difference(first, second);
SetView<String> uniqueSecond = Sets.difference(second, first);
return ALERT + " " + escape(uniqueFirst) + "; " + escape(uniqueSecond);
}

public static String escape(Set<String> uniqueFirst) {
return uniqueFirst.stream()
.map(x -> CodePointEscaper.toEscaped(x))
.collect(Collectors.joining(LRM + ", " + LRM, LRM, LRM));
}

public static String alphagram(String string) {
return string.codePoints()
.sorted()
.collect(
StringBuilder::new, // Supplier<R> supplier
StringBuilder::appendCodePoint, // ObjIntConsumer<R> accumulator
StringBuilder::append // BiConsumer<R,​R> combiner
)
.toString();
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.unicode.cldr.util;

import com.ibm.icu.impl.UnicodeMap;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import java.util.Locale;

Expand All @@ -19,10 +21,13 @@ public enum CodePointEscaper {
LF(0xA, "line feed"),
CR(0xD, "carriage return"),
SP(0x20, "space", "ASCII space"),
NSP(0x2009, "narrow/thin space", "Also known as ‘thin space’"),
TSP(0x2009, "thin space", "Aka ‘narrow space’"),
NBSP(0xA0, "no-break space", "Same as space, but doesn’t line wrap."),

NNBSP(0x202F, "narrow/thin no-break space", "Same as narrow space, but doesn’t line wrap."),
NBTSP(
0x202F,
"no-break thin space",
"Same as thin space, but doesn’t line wrap. Aka 'narrow no-break space'"),

WNJ(
0x200B,
Expand Down Expand Up @@ -110,9 +115,7 @@ public enum CodePointEscaper {
private final String description;

private CodePointEscaper(int codePoint, String shortName) {
this.codePoint = codePoint;
this.shortName = shortName;
this.description = "";
this(codePoint, shortName, "");
}

private CodePointEscaper(int codePoint, String shortName, String description) {
Expand Down Expand Up @@ -147,6 +150,11 @@ public int getCodePoint() {
return codePoint;
}

/** Return the string form of the code point for this character. */
public String getString() {
return UTF16.valueOf(codePoint);
}

/** Returns the escaped form from the code point for this enum */
public String codePointToEscaped() {
return ESCAPE_START + rawCodePointToEscaped(codePoint) + ESCAPE_END;
Expand Down Expand Up @@ -196,6 +204,15 @@ public static String toEscaped(String unescaped, UnicodeSet toEscape) {
});
return result.toString();
}

public static String getEscaped(int cp, UnicodeSet toEscape) {
if (!toEscape.contains(cp)) {
return UTF16.valueOf(cp);
} else {
return codePointToEscaped(cp);
}
}

/** Return unescaped string */
public static String toUnescaped(String escaped) {
if (escaped == null) {
Expand Down Expand Up @@ -273,4 +290,54 @@ public static String rawCodePointToEscaped(int codePoint) {
? Integer.toString(codePoint, 16).toUpperCase(Locale.ROOT)
: result.toString();
}

public static final String getHtmlRows(
UnicodeSet escapesToShow, String tableOptions, String cellOptions) {
if (!escapesToShow.strings().isEmpty()) {
throw new IllegalArgumentException("No strings allowed in the unicode set.");
}
StringBuilder result = new StringBuilder("<table" + tableOptions + ">");
UnicodeSet remaining = new UnicodeSet(escapesToShow);
String tdPlus = "<td" + cellOptions + ">";
for (CodePointEscaper cpe : CodePointEscaper.values()) {
int cp = cpe.getCodePoint();
remaining.remove(cp);
if (escapesToShow.contains(cpe.getCodePoint())) {
final String id = cpe.name();
final String shortName = cpe.getShortName();
final String description = cpe.getDescription();
addREsult(result, tdPlus, id, shortName, description);
}
}
for (String cps : remaining) {
int cp = cps.codePointAt(0);
final String extendedName = UCharacter.getExtendedName(cp);
addREsult(
result,
tdPlus,
Utility.hex(cp, 2),
"",
extendedName == null ? "" : extendedName.toLowerCase());
}
return result.append("</table>").toString();
}

public static void addREsult(
StringBuilder result,
String tdPlus,
final String id,
final String shortName,
final String description) {
result.append("<tr>")
.append(tdPlus)
.append(ESCAPE_START)
.append(id)
.append(ESCAPE_END + "</td>")
.append(tdPlus)
.append(shortName)
.append("</td>")
.append(tdPlus)
.append(description)
.append("</td><tr>");
}
}
Loading

0 comments on commit cbe6c80

Please sign in to comment.