Skip to content

Commit

Permalink
CLDR-17395 update scriptToExemplars.txt from tests (#3760)
Browse files Browse the repository at this point in the history
  • Loading branch information
srl295 authored May 30, 2024
1 parent 3568dce commit 2d0dc86
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ public static final Path getDtd2MdDirectory() {

public static final String EXEMPLARS_DIRECTORY =
CldrUtility.getPath(CLDRPaths.BASE_DIRECTORY, "exemplars/" + MAIN_SUBDIR);
public static final String UTIL_SRC_DATA_DIR =
CldrUtility.getPath(
BASE_DIRECTORY, "tools/cldr-code/src/main/resources/org/unicode/cldr/util");
public static final String BIRTH_DATA_DIR =
CldrUtility.getPath(
BASE_DIRECTORY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,29 @@
import com.google.common.collect.Maps;
import com.ibm.icu.text.UnicodeSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

public class ScriptToExemplars {
public static final String FILE_PATH = "data/locales/scriptToExemplars.txt";

public static UnicodeSet getExemplars(String script) {
return ScriptToExemplarsLoader.SINGLETON.getExemplars(script);
}

/** return the comment block from the original file */
private static String getCommentBlock() {
return String.join("\n", ScriptToExemplarsLoader.SINGLETON.comments) + '\n';
}

private static class ScriptToExemplarsLoader {
private static final ScriptToExemplarsLoader SINGLETON = new ScriptToExemplarsLoader();
private Map<String, UnicodeSet> data;
private String[] comments;

private UnicodeSet getExemplars(String script) {
UnicodeSet result = data.get(script);
Expand All @@ -25,13 +36,15 @@ private UnicodeSet getExemplars(String script) {

{
Map<String, UnicodeSet> _data = Maps.newTreeMap();
try (BufferedReader reader =
FileReaders.openFile(
ScriptToExemplars.class, "data/locales/scriptToExemplars.txt")) {
List<String> _comments = new ArrayList<String>();
try (BufferedReader reader = FileReaders.openFile(ScriptToExemplars.class, FILE_PATH)) {
Iterable<String> rlsi =
With.toIterable(new FileReaders.ReadLineSimpleIterator(reader));
for (String line : rlsi) {
if (line.isBlank() || line.startsWith("#")) {
if (line.isBlank()) {
continue;
} else if (line.startsWith("#")) {
_comments.add(line.trim());
continue;
}
Iterator<String> parts = Splitter.on(';').trimResults().split(line).iterator();
Expand All @@ -44,6 +57,33 @@ private UnicodeSet getExemplars(String script) {
throw new RuntimeException(e);
}
data = ImmutableMap.copyOf(_data);
comments = _comments.toArray(new String[_comments.size()]);
}
}

/** Called by LikelySubtagsTest.testGetResolvedScriptVsExemplars */
public static void write(Map<String, UnicodeSet> expected) {
final File file = new File(CLDRPaths.UTIL_SRC_DATA_DIR, FILE_PATH);
try (TempPrintWriter out = new TempPrintWriter(file)) {
// copy all comment lines in the file
out.println(getCommentBlock());
// copy all updated sets
for (Map.Entry<String, UnicodeSet> entry : expected.entrySet()) {
String script = entry.getKey();
UnicodeSet flattened = entry.getValue();
if (!flattened.isEmpty()) {
out.println(
script
+ " ;\t"
+ flattened.size()
+ " ;\t"
+ flattened.toPattern(false));
}
}
System.err.println(
"Wrote: "
+ file.getAbsolutePath()
+ "\n Please check it carefully and commit it if needed.");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
# - Contains a test that verifies that this data correspond to the derivation from the CLDR data.
# - Also has the details of the derivation from CLDR data, and shows how to refresh this data, if CLDR data changes.
# - NOTE: We may tweak the derivation over time.
#
# To update this file, run LikelySubtagsTest.java with -DSHOW_EXEMPLARS
# The file will be updated in place, review carefully and check in.

Arab ; 106 ; [ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٴٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ]
Beng ; 12 ; [ঌরঽৄৎৗৠ-ৣৰৱ]
Cans ; 133 ; [ᐁᐍᐏᐑᐓᐕᐘᐚᐟᐠᐢ-ᐤᐦᐨᐩᐯᐻᐽᐿᑁᑃᑅᑇᑉᑊᑌᑘᑚᑜᑞᑠᑢᑤᑦᑫᑵᑷᑹᑻᑽᑿᒁᒃᒉᒓᒕᒗᒙᒛᒝᒟᒡᒣᒭᒯᒱᒳᒵᒷᒹᒻᒼᓀᓊᓌᓎᓐᓕ-ᓘᓚᓛᓪᓭᓷᓹᓻᓽᓿᔁᔃᔅᔦᔰᔲᔴᔶᔸᔺᔼᔾᕆ-ᕉᕋᕌᕐᕕ-ᕚᕝᕿ-ᖃᖅᖏᖑ-ᖖᖠ-ᖦᙱ-ᙶ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,21 +595,10 @@ public void testGetResolvedScriptVsExemplars() {

if (!problemScripts.isEmpty()) {
warnln(
"Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to get a fresh copy, or reset to expected value for: "
"Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to update, or reset to expected value for: "
+ problemScripts);
if (SHOW_EXEMPLARS) {
for (Entry<String, UnicodeSet> entry : expected.entrySet()) {
String script = entry.getKey();
UnicodeSet flattened = entry.getValue();
if (!flattened.isEmpty()) {
System.out.println(
script
+ " ;\t"
+ flattened.size()
+ " ;\t"
+ flattened.toPattern(false));
}
}
ScriptToExemplars.write(expected);
}
}
}
Expand Down

0 comments on commit 2d0dc86

Please sign in to comment.