From a9bf06a635a97abe5b3affb2f505e4264be57cf1 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 28 May 2024 19:22:59 -0500 Subject: [PATCH] CLDR-17395 update scriptToExemplars.txt from tests - change tests to update scriptToExemplars.txt when -DSHOW_EXEMPLARS is set - update scriptToExemplars.txt with instructions --- .../java/org/unicode/cldr/util/CLDRPaths.java | 3 ++ .../unicode/cldr/util/ScriptToExemplars.java | 48 +++++++++++++++++-- .../util/data/locales/scriptToExemplars.txt | 4 ++ .../cldr/unittest/LikelySubtagsTest.java | 15 +----- 4 files changed, 53 insertions(+), 17 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRPaths.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRPaths.java index 9388c91a3a9..97a7dbc1867 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRPaths.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRPaths.java @@ -88,6 +88,9 @@ public static final Path getDtd2MdDirectory() { public static final String EXEMPLARS_DIRECTORY = CldrUtility.getPath(CLDRPaths.BASE_DIRECTORY, "exemplars/" + MAIN_SUBDIR); + public static final String UTIL_SRC_DATA_DIR = + CldrUtility.getPath( + BASE_DIRECTORY, "tools/cldr-code/src/main/resources/org/unicode/cldr/util"); public static final String BIRTH_DATA_DIR = CldrUtility.getPath( BASE_DIRECTORY, diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/ScriptToExemplars.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/ScriptToExemplars.java index a86377afeeb..6fe84de7bed 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/ScriptToExemplars.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/ScriptToExemplars.java @@ -5,18 +5,29 @@ import com.google.common.collect.Maps; import com.ibm.icu.text.UnicodeSet; import java.io.BufferedReader; +import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.Map; public class ScriptToExemplars { + public static final String FILE_PATH = "data/locales/scriptToExemplars.txt"; + public static UnicodeSet getExemplars(String script) { return ScriptToExemplarsLoader.SINGLETON.getExemplars(script); } + /** return the comment block from the original file */ + private static String getCommentBlock() { + return String.join("\n", ScriptToExemplarsLoader.SINGLETON.comments) + '\n'; + } + private static class ScriptToExemplarsLoader { private static final ScriptToExemplarsLoader SINGLETON = new ScriptToExemplarsLoader(); private Map data; + private String[] comments; private UnicodeSet getExemplars(String script) { UnicodeSet result = data.get(script); @@ -25,13 +36,15 @@ private UnicodeSet getExemplars(String script) { { Map _data = Maps.newTreeMap(); - try (BufferedReader reader = - FileReaders.openFile( - ScriptToExemplars.class, "data/locales/scriptToExemplars.txt")) { + List _comments = new ArrayList(); + try (BufferedReader reader = FileReaders.openFile(ScriptToExemplars.class, FILE_PATH)) { Iterable rlsi = With.toIterable(new FileReaders.ReadLineSimpleIterator(reader)); for (String line : rlsi) { - if (line.isBlank() || line.startsWith("#")) { + if (line.isBlank()) { + continue; + } else if (line.startsWith("#")) { + _comments.add(line.trim()); continue; } Iterator parts = Splitter.on(';').trimResults().split(line).iterator(); @@ -44,6 +57,33 @@ private UnicodeSet getExemplars(String script) { throw new RuntimeException(e); } data = ImmutableMap.copyOf(_data); + comments = _comments.toArray(new String[_comments.size()]); + } + } + + /** Called by LikelySubtagsTest.testGetResolvedScriptVsExemplars */ + public static void write(Map expected) { + final File file = new File(CLDRPaths.UTIL_SRC_DATA_DIR, FILE_PATH); + try (TempPrintWriter out = new TempPrintWriter(file)) { + // copy all comment lines in the file + out.println(getCommentBlock()); + // copy all updated sets + for (Map.Entry entry : expected.entrySet()) { + String script = entry.getKey(); + UnicodeSet flattened = entry.getValue(); + if (!flattened.isEmpty()) { + out.println( + script + + " ;\t" + + flattened.size() + + " ;\t" + + flattened.toPattern(false)); + } + } + System.err.println( + "Wrote: " + + file.getAbsolutePath() + + "\n Please check it carefully and commit it if needed."); } } } diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt index ef9aa12db52..61fb40a62fb 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/locales/scriptToExemplars.txt @@ -10,6 +10,10 @@ # - Contains a test that verifies that this data correspond to the derivation from the CLDR data. # - Also has the details of the derivation from CLDR data, and shows how to refresh this data, if CLDR data changes. # - NOTE: We may tweak the derivation over time. +# +# To update this file, run LikelySubtagsTest.java with -DSHOW_EXEMPLARS +# The file will be updated in place, review carefully and check in. + Arab ; 106 ; [ؠ-ئةثحخذص-غؽف-كه-ْٰٔٙٛٲٴٶٹ-ځڃ-ڊڌڍڏڑړڕږژ-ښڠڢڤڦک-ګڭگڱڳڵں-ھہۃۄۆ-ېےەݙݜݢݨݲ] Beng ; 12 ; [ঌরঽৄৎৗৠ-ৣৰৱ] Cans ; 133 ; [ᐁᐍᐏᐑᐓᐕᐘᐚᐟᐠᐢ-ᐤᐦᐨᐩᐯᐻᐽᐿᑁᑃᑅᑇᑉᑊᑌᑘᑚᑜᑞᑠᑢᑤᑦᑫᑵᑷᑹᑻᑽᑿᒁᒃᒉᒓᒕᒗᒙᒛᒝᒟᒡᒣᒭᒯᒱᒳᒵᒷᒹᒻᒼᓀᓊᓌᓎᓐᓕ-ᓘᓚᓛᓪᓭᓷᓹᓻᓽᓿᔁᔃᔅᔦᔰᔲᔴᔶᔸᔺᔼᔾᕆ-ᕉᕋᕌᕐᕕ-ᕚᕝᕿ-ᖃᖅᖏᖑ-ᖖᖠ-ᖦᙱ-ᙶ] diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java index 7e8b53db97e..6bfa2a607ff 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java @@ -595,21 +595,10 @@ public void testGetResolvedScriptVsExemplars() { if (!problemScripts.isEmpty()) { warnln( - "Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to get a fresh copy, or reset to expected value for: " + "Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to update, or reset to expected value for: " + problemScripts); if (SHOW_EXEMPLARS) { - for (Entry entry : expected.entrySet()) { - String script = entry.getKey(); - UnicodeSet flattened = entry.getValue(); - if (!flattened.isEmpty()) { - System.out.println( - script - + " ;\t" - + flattened.size() - + " ;\t" - + flattened.toPattern(false)); - } - } + ScriptToExemplars.write(expected); } } }