Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17395 update scriptToExemplars.txt from tests #3760

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ public static final Path getDtd2MdDirectory() {

public static final String EXEMPLARS_DIRECTORY =
CldrUtility.getPath(CLDRPaths.BASE_DIRECTORY, "exemplars/" + MAIN_SUBDIR);
public static final String UTIL_SRC_DATA_DIR =
CldrUtility.getPath(
BASE_DIRECTORY, "tools/cldr-code/src/main/resources/org/unicode/cldr/util");
public static final String BIRTH_DATA_DIR =
CldrUtility.getPath(
BASE_DIRECTORY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,29 @@
import com.google.common.collect.Maps;
import com.ibm.icu.text.UnicodeSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class ScriptToExemplars {
public static final String FILE_PATH = "data/locales/scriptToExemplars.txt";

public static UnicodeSet getExemplars(String script) {
return ScriptToExemplarsLoader.SINGLETON.getExemplars(script);
}

/** return the comment block from the original file */
private static String getCommentBlock() {
return String.join("\n", ScriptToExemplarsLoader.SINGLETON.comments) + '\n';
}

private static class ScriptToExemplarsLoader {
private static final ScriptToExemplarsLoader SINGLETON = new ScriptToExemplarsLoader();
private Map<String, UnicodeSet> data;
private String[] comments;

private UnicodeSet getExemplars(String script) {
UnicodeSet result = data.get(script);
Expand All @@ -25,13 +36,15 @@ private UnicodeSet getExemplars(String script) {

{
Map<String, UnicodeSet> _data = Maps.newTreeMap();
try (BufferedReader reader =
FileReaders.openFile(
ScriptToExemplars.class, "data/locales/scriptToExemplars.txt")) {
List<String> _comments = new ArrayList<String>();
try (BufferedReader reader = FileReaders.openFile(ScriptToExemplars.class, FILE_PATH)) {
Iterable<String> rlsi =
With.toIterable(new FileReaders.ReadLineSimpleIterator(reader));
for (String line : rlsi) {
if (line.isBlank() || line.startsWith("#")) {
if (line.isBlank()) {
continue;
} else if (line.startsWith("#")) {
_comments.add(line.trim());
continue;
}
Iterator<String> parts = Splitter.on(';').trimResults().split(line).iterator();
Expand All @@ -44,6 +57,33 @@ private UnicodeSet getExemplars(String script) {
throw new RuntimeException(e);
}
data = ImmutableMap.copyOf(_data);
comments = _comments.toArray(new String[_comments.size()]);
}
}

/** Called by LikelySubtagsTest.testGetResolvedScriptVsExemplars */
public static void write(Map<String, UnicodeSet> expected) {
final File file = new File(CLDRPaths.UTIL_SRC_DATA_DIR, FILE_PATH);
try (TempPrintWriter out = new TempPrintWriter(file)) {
// copy all comment lines in the file
out.println(getCommentBlock());
// copy all updated sets
for (Map.Entry<String, UnicodeSet> entry : expected.entrySet()) {
String script = entry.getKey();
UnicodeSet flattened = entry.getValue();
if (!flattened.isEmpty()) {
out.println(
script
+ " ;\t"
+ flattened.size()
+ " ;\t"
+ flattened.toPattern(false));
}
}
System.err.println(
"Wrote: "
+ file.getAbsolutePath()
+ "\n Please check it carefully and commit it if needed.");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
# - Contains a test that verifies that this data correspond to the derivation from the CLDR data.
# - Also has the details of the derivation from CLDR data, and shows how to refresh this data, if CLDR data changes.
# - NOTE: We may tweak the derivation over time.
#
# To update this file, run LikelySubtagsTest.java with -DSHOW_EXEMPLARS
# The file will be updated in place, review carefully and check in.

Arab ; 106 ; [ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٴٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
Beng ; 12 ; [ঌরঽৄৎৗৠ-ৣৰৱ]
Cans ; 133 ; [ᐁᐍᐏᐑᐓᐕᐘᐚᐟᐠᐢ-ᐤᐦᐨᐩᐯᐻᐽᐿᑁᑃᑅᑇᑉᑊᑌᑘᑚᑜᑞᑠᑢᑤᑦᑫᑵᑷᑹᑻᑽᑿᒁᒃᒉᒓᒕᒗᒙᒛᒝᒟᒡᒣᒭᒯᒱᒳᒵᒷᒹᒻᒼᓀᓊᓌᓎᓐᓕ-ᓘᓚᓛᓪᓭᓷᓹᓻᓽᓿᔁᔃᔅᔦᔰᔲᔴᔶᔸᔺᔼᔾᕆ-ᕉᕋᕌᕐᕕ-ᕚᕝᕿ-ᖃᖅᖏᖑ-ᖖᖠ-ᖦᙱ-ᙶ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@
}

if (!collectedBad.isEmpty()) {
warnln(

Check warning on line 550 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java

View workflow job for this annotation

GitHub Actions / build

(LikelySubtagsTest.java:550) Warning: Locales have 224 unexpected characters in main and/or aux: [҂״܀-܍०-९৲-৺੦-੯૰౦-౯೦-೯໐-໙၀-၉၏႐-႙႞႟፠-፼᎐-᎙᠐-᠙꒐-꓆𞅏{a\:}{ch’}{e\:}{i\:}{k’}{o\:}{ts’}{tł’}{t’}{à\:}{á\:}{è\:}{é\:}{ì\:}{í\:}{ò\:}{ó\:}{ଅ\:}{ଆ\:}{ଏ\:}]
"Locales have "
+ collectedBad.size()
+ " unexpected characters in main and/or aux:\t"
Expand Down Expand Up @@ -595,21 +595,10 @@

if (!problemScripts.isEmpty()) {
warnln(
"Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to get a fresh copy, or reset to expected value for: "
"Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to update, or reset to expected value for: "
+ problemScripts);
if (SHOW_EXEMPLARS) {
for (Entry<String, UnicodeSet> entry : expected.entrySet()) {
String script = entry.getKey();
UnicodeSet flattened = entry.getValue();
if (!flattened.isEmpty()) {
System.out.println(
script
+ " ;\t"
+ flattened.size()
+ " ;\t"
+ flattened.toPattern(false));
}
}
ScriptToExemplars.write(expected);
}
}
}
Expand Down Expand Up @@ -792,7 +781,7 @@
// SUPPLEMENTAL_DATA_INFO.getLikelyOrigins().get(value);
// fieldToOrigin.put(value, origin == null ? "n/a" : origin);
// }
warnln("Bad status=" + entry.getKey() + " for " + entry.getValue());

Check warning on line 784 in tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java

View workflow job for this annotation

GitHub Actions / build

(LikelySubtagsTest.java:784) Warning: Bad status=macroregion for [001, 419]
}
}
}
Expand Down
Loading