From d9b2073d932052cdcb836c317c0b91d1e0858d83 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Tue, 30 Jul 2024 20:54:44 -0700
Subject: [PATCH 1/3] CLDR-17844 Modify the date report

---
 .../unicode/cldr/util/CodePointEscaper.java   |  22 ++-
 .../unicode/cldr/util/DateTimeFormats.java    | 140 ++++++++++++++++--
 2 files changed, 145 insertions(+), 17 deletions(-)

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
index 711d16895fa..9b6c304024a 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
@@ -2,6 +2,7 @@
 
 import com.ibm.icu.impl.UnicodeMap;
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
 import java.util.Locale;
 
@@ -19,10 +20,13 @@ public enum CodePointEscaper {
     LF(0xA, "line feed"),
     CR(0xD, "carriage return"),
     SP(0x20, "space", "ASCII space"),
-    NSP(0x2009, "narrow/thin space", "Also known as ‘thin space’"),
+    TSP(0x2009, "thin space", "Aka ‘narrow space’"),
     NBSP(0xA0, "no-break space", "Same as space, but doesn’t line wrap."),
 
-    NNBSP(0x202F, "narrow/thin no-break space", "Same as narrow space, but doesn’t line wrap."),
+    NBTSP(
+            0x202F,
+            "no-break thin space",
+            "Same as thin space, but doesn’t line wrap. Aka 'narrow no-break space'"),
 
     WNJ(
             0x200B,
@@ -147,6 +151,11 @@ public int getCodePoint() {
         return codePoint;
     }
 
+    /** Return the string form of the code point for this character. */
+    public String getString() {
+        return UTF16.valueOf(codePoint);
+    }
+
     /** Returns the escaped form from the code point for this enum */
     public String codePointToEscaped() {
         return ESCAPE_START + rawCodePointToEscaped(codePoint) + ESCAPE_END;
@@ -196,6 +205,15 @@ public static String toEscaped(String unescaped, UnicodeSet toEscape) {
                         });
         return result.toString();
     }
+
+    public static String getEscaped(int cp, UnicodeSet toEscape) {
+        if (!toEscape.contains(cp)) {
+            return UTF16.valueOf(cp);
+        } else {
+            return codePointToEscaped(cp);
+        }
+    }
+
     /** Return unescaped string */
     public static String toUnescaped(String escaped) {
         if (escaped == null) {
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
index e14b0044046..a7f299a967e 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
@@ -1,7 +1,9 @@
 package org.unicode.cldr.util;
 
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSet;
 import com.ibm.icu.impl.Row.R3;
+import com.ibm.icu.text.Bidi;
 import com.ibm.icu.text.DateFormat;
 import com.ibm.icu.text.DateIntervalFormat;
 import com.ibm.icu.text.DateIntervalInfo;
@@ -13,6 +15,7 @@
 import com.ibm.icu.text.MessageFormat;
 import com.ibm.icu.text.SimpleDateFormat;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSet.SpanCondition;
 import com.ibm.icu.util.Calendar;
 import com.ibm.icu.util.DateInterval;
 import com.ibm.icu.util.ICUUncheckedIOException;
@@ -45,6 +48,15 @@
 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
 
 public class DateTimeFormats {
+    private static final UnicodeSet TO_ESCAPE =
+            new UnicodeSet(CodePointEscaper.FORCE_ESCAPE)
+                    .remove(CodePointEscaper.SP.getCodePoint())
+                    .remove(CodePointEscaper.TSP.getCodePoint())
+                    .remove(CodePointEscaper.NBSP.getCodePoint())
+                    .remove(CodePointEscaper.NBTSP.getCodePoint())
+                    .freeze();
+    private static final String MISSING_PART = "ⓜⓘⓢⓢⓘⓝⓖ";
+    private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
     private static final Date SAMPLE_DATE_DEFAULT_END = new Date(2099 - 1900, 0, 13, 14, 45, 59);
     private static final String DIR = CLDRPaths.CHART_DIRECTORY + "/verify/dates/";
     private static SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
@@ -76,9 +88,19 @@ enum MyOptions {
     // constant sets should
     // probably be moved to a common file of such things.
     private static final UnicodeSet BIDI_MARKS = new UnicodeSet("[:Bidi_Control:]").freeze();
-    private static final String exampleSep = "<br>";
+
+    private static final String ltrBackground = "background-color:#EEE;";
+    private static final String tableBackground = " background-color:#DDF;";
+
     private static final String rtlStart = "<div dir='rtl'>";
-    private static final String rtlEnd = "</div>";
+    private static final String ltrStart = "<div dir='ltr' style='" + ltrBackground + "'>";
+    private static final String divEnd = "</div>";
+    private static final String tableStyle =
+            "style='border-collapse: collapse;" + tableBackground + " margin: auto'";
+
+    private static final String ltrSpan = "<span style='" + ltrBackground + "'>";
+    private static final String tableSpan = "<span style='" + tableBackground + "'>";
+    private static final String spanEnd = "</span>";
 
     private static final String[] STOCK = {"short", "medium", "long", "full"};
     private static final String[] CALENDAR_FIELD_TO_PATTERN_LETTER = {
@@ -125,7 +147,7 @@ enum MyOptions {
     private ULocale locale;
     private ICUServiceBuilder icuServiceBuilder;
     private ICUServiceBuilder icuServiceBuilderEnglish =
-            new ICUServiceBuilder().setCldrFile(CLDRConfig.getInstance().getEnglish());
+            new ICUServiceBuilder().setCldrFile(CONFIG.getEnglish());
 
     private DateIntervalInfo dateIntervalInfo = new DateIntervalInfo();
     private String calendarID;
@@ -133,8 +155,7 @@ enum MyOptions {
     private boolean isRTL;
 
     private static String surveyUrl =
-            CLDRConfig.getInstance()
-                    .getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");
+            CONFIG.getProperty("CLDR_SURVEY_URL", "http://st.unicode.org/cldr-apps/survey");
 
     /**
      * Set a CLDRFile and calendar. Must be done before calling addTable.
@@ -493,7 +514,33 @@ public boolean isPresent(String skeleton) {
     public void addTable(DateTimeFormats comparison, Appendable output) {
         try {
             output.append(
-                    "<h2>" + hackDoubleLinked("Patterns") + "</h2>\n<table class='dtf-table'>");
+                    "<h2>"
+                            + hackDoubleLinked("Patterns")
+                            + "</h2>"
+                            + "<p>Normally, there is one line containing an example in each Native Example cell. "
+                            + (!isRTL
+                                    ? ""
+                                    : "However, two examples are provided if the locale is right-to-left, like Arabic or Hebrew, "
+                                            + "<i>and</i> the paragraph direction can cause a different display. "
+                                            + "The first has a RTL paragraph direction, "
+                                            + "while the second has a LTR paragraph direction "
+                                            + ltrSpan
+                                            + "<i>and</i> a different background"
+                                            + spanEnd
+                                            + ". If the display of either example causes strings of letters or numbers to collide, "
+                                            + "then a ⚠️ is shown. ")
+                            + "When an example has hidden characters, then "
+                            + tableSpan
+                            + "an extra line"
+                            + spanEnd
+                            + " shows those characters "
+                            + "such as ❰RLM❱ for the invisible Right-to-Left Mark. "
+                            + "So that the ordering of the characters in memory is clear, they are presented left-to-right one at a time. "
+                            + "so that the placement is clear. "
+                            + "When a pattern (or a component of a pattern) is missing, it is displayed as "
+                            + MISSING_PART
+                            + ".</p>"
+                            + "\n<table class='dtf-table'>");
             Diff diff = new Diff();
             boolean is24h = generator.getDefaultHourFormatChar() == 'H';
             showRow(
@@ -502,7 +549,7 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
                     FIELDS_TITLE,
                     "Skeleton",
                     "English Example",
-                    "Native Example (neutral context,<br>then RTL if relevant)",
+                    "Native Example",
                     false);
             for (String[] nameAndSkeleton : NAME_AND_PATTERN) {
                 String name = nameAndSkeleton[0];
@@ -615,12 +662,76 @@ private String getExample(String skeleton) {
             }
         }
         String transformedExample = TransliteratorUtilities.toHTML.transform(example);
-        if (isRTL || BIDI_MARKS.containsSome(transformedExample)) {
-            transformedExample += exampleSep + rtlStart + transformedExample + rtlEnd;
+        if ((isRTL || BIDI_MARKS.containsSome(example)) && !example.contains(MISSING_PART)) {
+            Bidi bidiLTR = new Bidi(example, Bidi.DIRECTION_LEFT_TO_RIGHT);
+            String orderedLTR = bidiLTR.writeReordered(0);
+            Bidi bidiRTL = new Bidi(example, Bidi.DIRECTION_RIGHT_TO_LEFT);
+            String orderedRTL = bidiRTL.writeReordered(0);
+            if (!orderedLTR.equals(orderedRTL)) {
+                // since this is RTL, we put it first
+                String rtlVersion = rtlStart + transformedExample + divEnd;
+                String ltrVersion = ltrStart + transformedExample + divEnd; // colored
+                Set<String> fieldsLTR = getFields(orderedLTR);
+                Set<String> fieldsRTL = getFields(orderedRTL);
+                String alert = fieldsLTR.equals(fieldsRTL) ? "" : " ⚠️ ";
+                transformedExample = rtlVersion + ltrVersion + alert;
+            }
+        }
+
+        if (TO_ESCAPE.containsSome(example)) {
+            StringBuilder processed = new StringBuilder();
+            example.codePoints()
+                    .forEach(
+                            x -> {
+                                processed
+                                        .append("<td>")
+                                        .append(
+                                                TransliteratorUtilities.toHTML.transform(
+                                                        CodePointEscaper.getEscaped(x, TO_ESCAPE)))
+                                        .append("</td>");
+                            });
+
+            transformedExample += "<table " + tableStyle + "><tr>" + processed + "</tr></table>";
         }
         return transformedExample;
     }
 
+    /**
+     * Return a list of the fields, where each span is a sequence of:
+     *
+     * <ul>
+     *   <li>numbers (\p{N})
+     *   <li>letters & marks ([\p{L}\p{M}
+     *   <li>Other
+     * </ul>
+     *
+     * @param orderedLTR
+     * @return
+     */
+    static final UnicodeSet NUMBERS = new UnicodeSet("\\p{N}").freeze();
+
+    static final UnicodeSet LETTERS_MARKS = new UnicodeSet("[\\p{L}\\p{M}]").freeze();
+    static final UnicodeSet OTHERS =
+            new UnicodeSet(NUMBERS).addAll(LETTERS_MARKS).complement().freeze();
+    static final Set<UnicodeSet> ALL = ImmutableSet.of(NUMBERS, LETTERS_MARKS, OTHERS);
+
+    private Set<String> getFields(String ordered) {
+        Set<String> result =
+                new LinkedHashSet<>(); // doesn't have to be a LHS, but helps with debugging
+        int start = 0;
+        while (start < ordered.length()) {
+            for (UnicodeSet us : ALL) {
+                int end = us.span(ordered, start, SpanCondition.CONTAINED);
+                if (end != start) {
+                    result.add(ordered.substring(start, end));
+                    start = end;
+                    break;
+                }
+            }
+        }
+        return result;
+    }
+
     static final Pattern RELATIVE_DATE =
             PatternCache.get("®([a-z]+(?:-[a-z]+)?)+(-[a-z]+)?([+-]?\\d+)([a-zA-Z]+)?");
 
@@ -677,7 +788,7 @@ private String getRelativeExampleFromSkeleton(String skeleton) {
         RelativePattern rp = new RelativePattern(file, skeleton);
         String value = rp.value;
         if (value == null) {
-            value = "ⓜⓘⓢⓢⓘⓝⓖ";
+            value = MISSING_PART;
         } else {
             DecimalFormat format = icuServiceBuilder.getNumberFormat(0);
             value = value.replace("{0}", format.format(Math.abs(rp.offset)).replace("'", "''"));
@@ -988,10 +1099,9 @@ public static void main(String[] args) throws IOException {
         String organization = MyOptions.organization.option.getValue();
         String filter = MyOptions.filter.option.getValue();
 
-        Factory englishFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
-        CLDRFile englishFile = englishFactory.make("en", true);
+        CLDRFile englishFile = CONFIG.getEnglish();
 
-        Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, LOCALES);
+        Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
         System.out.println("Total locales: " + factory.getAvailableLanguages().size());
         DateTimeFormats english = new DateTimeFormats().set(englishFile, "gregorian");
 
@@ -1004,7 +1114,7 @@ public static void main(String[] args) throws IOException {
         Map<String, String> sorted = new TreeMap<>();
         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
         Set<String> defaultContent = sdi.getDefaultContentLocales();
-        for (String localeID : factory.getAvailableLanguages()) {
+        for (String localeID : factory.getAvailable()) {
             Level level = StandardCodes.make().getLocaleCoverageLevel(organization, localeID);
             if (Level.MODERN.compareTo(level) > 0) {
                 continue;
@@ -1047,7 +1157,7 @@ public static void main(String[] args) throws IOException {
                             + name
                             + "</h1>"
                             + "<p><a href='index.html'>Index</a></p>\n"
-                            + "<p>The following chart shows typical usage of date and time formatting with the Gregorian calendar. "
+                            + "<p>The following chart shows typical usage of date and time formatting with the Gregorian calendar and default number system. "
                             + "<i>There is important information on <a target='CLDR_ST_DOCS' href='http://cldr.unicode.org/translation/date-time-review'>Date/Time Review</a>, "
                             + "so please read that page before starting!</i></p>\n");
             formats.addTable(english, out);

From 2d8ad0b31276f9337e61c02550fe8794a313ce1c Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Wed, 31 Jul 2024 06:17:09 -0700
Subject: [PATCH 2/3] CLDR-17844 Change to auto vs RTL comparison

---
 .../unicode/cldr/util/DateTimeFormats.java    | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
index a7f299a967e..60d38bbdac8 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
@@ -90,13 +90,14 @@ enum MyOptions {
     private static final UnicodeSet BIDI_MARKS = new UnicodeSet("[:Bidi_Control:]").freeze();
 
     private static final String ltrBackground = "background-color:#EEE;";
-    private static final String tableBackground = " background-color:#DDF;";
+    private static final String tableBackground = "background-color:#DDF; border: 1px solid blue;";
 
     private static final String rtlStart = "<div dir='rtl'>";
-    private static final String ltrStart = "<div dir='ltr' style='" + ltrBackground + "'>";
+    private static final String autoLtrStart = "<div dir='auto' style='" + ltrBackground + "'>";
+    private static final String autoStart = "<div dir='auto'>";
     private static final String divEnd = "</div>";
     private static final String tableStyle =
-            "style='border-collapse: collapse;" + tableBackground + " margin: auto'";
+            "style='border-collapse: collapse;" + tableBackground + " margin: auto'"; //
 
     private static final String ltrSpan = "<span style='" + ltrBackground + "'>";
     private static final String tableSpan = "<span style='" + tableBackground + "'>";
@@ -522,8 +523,8 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
                                     ? ""
                                     : "However, two examples are provided if the locale is right-to-left, like Arabic or Hebrew, "
                                             + "<i>and</i> the paragraph direction can cause a different display. "
-                                            + "The first has a RTL paragraph direction, "
-                                            + "while the second has a LTR paragraph direction "
+                                            + "The first has a <b>RTL</b> paragraph direction, "
+                                            + "while the second has a <b>auto</b> paragraph direction (LTR unless the first 'strong' character is RTL) "
                                             + ltrSpan
                                             + "<i>and</i> a different background"
                                             + spanEnd
@@ -663,18 +664,21 @@ private String getExample(String skeleton) {
         }
         String transformedExample = TransliteratorUtilities.toHTML.transform(example);
         if ((isRTL || BIDI_MARKS.containsSome(example)) && !example.contains(MISSING_PART)) {
-            Bidi bidiLTR = new Bidi(example, Bidi.DIRECTION_LEFT_TO_RIGHT);
+            Bidi bidiLTR = new Bidi(example, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
             String orderedLTR = bidiLTR.writeReordered(0);
             Bidi bidiRTL = new Bidi(example, Bidi.DIRECTION_RIGHT_TO_LEFT);
             String orderedRTL = bidiRTL.writeReordered(0);
             if (!orderedLTR.equals(orderedRTL)) {
                 // since this is RTL, we put it first
-                String rtlVersion = rtlStart + transformedExample + divEnd;
-                String ltrVersion = ltrStart + transformedExample + divEnd; // colored
+                String rtlVersion = rtlStart + transformedExample + divEnd; // not colored
+                String autoVersion = autoLtrStart + transformedExample + divEnd; // colored
                 Set<String> fieldsLTR = getFields(orderedLTR);
                 Set<String> fieldsRTL = getFields(orderedRTL);
                 String alert = fieldsLTR.equals(fieldsRTL) ? "" : " ⚠️ ";
-                transformedExample = rtlVersion + ltrVersion + alert;
+                transformedExample = rtlVersion + autoVersion + alert;
+            } else {
+                String autoVersion = autoStart + transformedExample + divEnd; // not colored
+                transformedExample = autoVersion;
             }
         }
 

From 595dde363d36cecd07b135d8ec8cef83c1774eb4 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Thu, 1 Aug 2024 06:43:41 -0700
Subject: [PATCH 3/3] CLDR-17844 cleanup

---
 .../java/org/unicode/cldr/util/BidiUtils.java | 162 ++++++++++++++++++
 .../unicode/cldr/util/CodePointEscaper.java   |  55 +++++-
 .../unicode/cldr/util/DateTimeFormats.java    |  97 ++++-------
 3 files changed, 249 insertions(+), 65 deletions(-)
 create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/util/BidiUtils.java

diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/BidiUtils.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/BidiUtils.java
new file mode 100644
index 00000000000..dc7e9f2761b
--- /dev/null
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/BidiUtils.java
@@ -0,0 +1,162 @@
+package org.unicode.cldr.util;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.common.collect.Sets.SetView;
+import com.ibm.icu.text.Bidi;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSet.SpanCondition;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
+
+/**
+ * A set of utilities for handling BIDI, especially in charts and examples but not restricted to
+ * that.
+ */
+public class BidiUtils {
+    public static final String ALERT = "⚠️";
+    static final String LRM = CodePointEscaper.LRM.getString();
+
+    // These are intended to be classes of characters that "stick together in order"
+    // The initial focus is dates, so this will probably need to be expanded for numbers; might need
+    // more syntax
+
+    private enum SpanClass {
+        NUMBERS("\\p{N}"),
+        LETTERS_MARKS("[\\p{L}\\p{M}]"),
+        DATE_PUNCT("[+]"),
+        SPACES("\\p{Z}"),
+        OTHERS("\\p{any}") // must be last, to pick up remainder.
+    ;
+        final UnicodeSet uset;
+
+        private SpanClass(String unicodeSetSource) {
+            uset = new UnicodeSet(unicodeSetSource);
+        }
+
+        static {
+            // clean up by removing previous values
+            UnicodeSet soFar = new UnicodeSet();
+            for (SpanClass sc : SpanClass.values()) {
+                sc.uset.removeAll(soFar).freeze();
+                soFar.addAll(sc.uset);
+            }
+        }
+    }
+    /**
+     * Checks the ordering of the example, under the specified bidiDirectionOptions;
+     *
+     * @param example Source text, not HTMLified
+     * @param outputReorderedResults One string for each specified bidiDirectionOption
+     * @param bidiDirectionOptions an array of BIDI directions from com.ibm.icu.text.Bidi. if there
+     *     are no items, the default is DIRECTION_DEFAULT_LEFT_TO_RIGHT (dir="auto"),
+     *     DIRECTION_RIGHT_TO_LEFT (dir="rtl").
+     * @return true unless two or more of the resulting strings are different.
+     */
+    public static boolean isOrderingUnchanged(
+            String example, List<String> outputReorderedResults, int... bidiDirectionOptions) {
+        boolean hasList = outputReorderedResults != null;
+        if (!hasList) {
+            outputReorderedResults = new ArrayList<>();
+        } else {
+            outputReorderedResults.clear();
+        }
+        boolean result = true;
+        for (int count = 0; count < bidiDirectionOptions.length; ++count) {
+            String reordered = new Bidi(example, bidiDirectionOptions[count]).writeReordered(0);
+            outputReorderedResults.add(reordered);
+            if (result && count != 0 && !reordered.equals(outputReorderedResults.get(0))) {
+                result = false;
+                if (!hasList) {
+                    break; // if the output results are not needed, then stop.
+                }
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Return a list of the , where each span is a sequence of:
+     *
+     * @param orderedLTR
+     * @return
+     */
+    /**
+     * Gets the 'fields' in a formatted string, used to test whether bidi reordering causes the
+     * original fields to merge when reordered. Each field is the longest contiguous span of
+     * characters with the same properties: *
+     *
+     * <ul>
+     *   <li>numbers (\p{N})
+     *   <li>letters & marks ([\p{L}\p{M}
+     *   <li>Other
+     * </ul>
+     *
+     * @param ordered
+     * @return a set of fields, in the same order as found in the text but duplicates removed (ike
+     *     LinkedHashSeet).
+     */
+    public static Set<String> getFields(String reordred, Set<String> result) {
+        int start = 0;
+        while (start < reordred.length()) {
+            for (SpanClass sc : SpanClass.values()) {
+                int end = sc.uset.span(reordred, start, SpanCondition.CONTAINED);
+                if (end != start) {
+                    result.add(reordred.substring(start, end));
+                    start = end;
+                    break;
+                }
+            }
+        }
+        return ImmutableSet.copyOf(result);
+    }
+
+    /**
+     * Show when the fields in strings are different
+     *
+     * @param bidiReordereds
+     * @return
+     */
+    public static String getAlert(List<String> bidiReordereds) {
+        Set<Set<String>> results = new LinkedHashSet<>();
+        for (String bidiReordered : bidiReordereds) {
+            Set<String> fieldsLTR = BidiUtils.getFields(bidiReordered, new TreeSet<>());
+            results.add(fieldsLTR);
+        }
+        if (results.size() < 2) {
+            return "";
+        }
+        // there can still be differences within a field of OTHERS, that we  ignore.
+        // EG ⚠️ 20,28,2B; 2B,28,20 " (+" vs " (+"
+
+        // show just the difference in the first 2, for now.
+        Iterator<Set<String>> it = results.iterator();
+        Set<String> first = it.next();
+        Set<String> second = it.next();
+        SetView<String> uniqueFirst = Sets.difference(first, second);
+        SetView<String> uniqueSecond = Sets.difference(second, first);
+        return ALERT + " " + escape(uniqueFirst) + "; " + escape(uniqueSecond);
+    }
+
+    public static String escape(Set<String> uniqueFirst) {
+        return uniqueFirst.stream()
+                .map(x -> CodePointEscaper.toEscaped(x))
+                .collect(Collectors.joining(LRM + ", " + LRM, LRM, LRM));
+    }
+
+    public static String alphagram(String string) {
+        return string.codePoints()
+                .sorted()
+                .collect(
+                        StringBuilder::new, // Supplier<R> supplier
+                        StringBuilder::appendCodePoint, // ObjIntConsumer<R> accumulator
+                        StringBuilder::append // BiConsumer<R,​R> combiner
+                        )
+                .toString();
+    }
+}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
index 9b6c304024a..04d030b7a19 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CodePointEscaper.java
@@ -1,6 +1,7 @@
 package org.unicode.cldr.util;
 
 import com.ibm.icu.impl.UnicodeMap;
+import com.ibm.icu.impl.Utility;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
@@ -114,9 +115,7 @@ public enum CodePointEscaper {
     private final String description;
 
     private CodePointEscaper(int codePoint, String shortName) {
-        this.codePoint = codePoint;
-        this.shortName = shortName;
-        this.description = "";
+        this(codePoint, shortName, "");
     }
 
     private CodePointEscaper(int codePoint, String shortName, String description) {
@@ -291,4 +290,54 @@ public static String rawCodePointToEscaped(int codePoint) {
                 ? Integer.toString(codePoint, 16).toUpperCase(Locale.ROOT)
                 : result.toString();
     }
+
+    public static final String getHtmlRows(
+            UnicodeSet escapesToShow, String tableOptions, String cellOptions) {
+        if (!escapesToShow.strings().isEmpty()) {
+            throw new IllegalArgumentException("No strings allowed in the unicode set.");
+        }
+        StringBuilder result = new StringBuilder("<table" + tableOptions + ">");
+        UnicodeSet remaining = new UnicodeSet(escapesToShow);
+        String tdPlus = "<td" + cellOptions + ">";
+        for (CodePointEscaper cpe : CodePointEscaper.values()) {
+            int cp = cpe.getCodePoint();
+            remaining.remove(cp);
+            if (escapesToShow.contains(cpe.getCodePoint())) {
+                final String id = cpe.name();
+                final String shortName = cpe.getShortName();
+                final String description = cpe.getDescription();
+                addREsult(result, tdPlus, id, shortName, description);
+            }
+        }
+        for (String cps : remaining) {
+            int cp = cps.codePointAt(0);
+            final String extendedName = UCharacter.getExtendedName(cp);
+            addREsult(
+                    result,
+                    tdPlus,
+                    Utility.hex(cp, 2),
+                    "",
+                    extendedName == null ? "" : extendedName.toLowerCase());
+        }
+        return result.append("</table>").toString();
+    }
+
+    public static void addREsult(
+            StringBuilder result,
+            String tdPlus,
+            final String id,
+            final String shortName,
+            final String description) {
+        result.append("<tr>")
+                .append(tdPlus)
+                .append(ESCAPE_START)
+                .append(id)
+                .append(ESCAPE_END + "</td>")
+                .append(tdPlus)
+                .append(shortName)
+                .append("</td>")
+                .append(tdPlus)
+                .append(description)
+                .append("</td><tr>");
+    }
 }
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
index 60d38bbdac8..87b368f9bfd 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DateTimeFormats.java
@@ -1,7 +1,6 @@
 package org.unicode.cldr.util;
 
 import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
 import com.ibm.icu.impl.Row.R3;
 import com.ibm.icu.text.Bidi;
 import com.ibm.icu.text.DateFormat;
@@ -15,7 +14,6 @@
 import com.ibm.icu.text.MessageFormat;
 import com.ibm.icu.text.SimpleDateFormat;
 import com.ibm.icu.text.UnicodeSet;
-import com.ibm.icu.text.UnicodeSet.SpanCondition;
 import com.ibm.icu.util.Calendar;
 import com.ibm.icu.util.DateInterval;
 import com.ibm.icu.util.ICUUncheckedIOException;
@@ -25,6 +23,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
 import java.util.EnumSet;
@@ -51,9 +50,6 @@ public class DateTimeFormats {
     private static final UnicodeSet TO_ESCAPE =
             new UnicodeSet(CodePointEscaper.FORCE_ESCAPE)
                     .remove(CodePointEscaper.SP.getCodePoint())
-                    .remove(CodePointEscaper.TSP.getCodePoint())
-                    .remove(CodePointEscaper.NBSP.getCodePoint())
-                    .remove(CodePointEscaper.NBTSP.getCodePoint())
                     .freeze();
     private static final String MISSING_PART = "ⓜⓘⓢⓢⓘⓝⓖ";
     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
@@ -513,12 +509,13 @@ public boolean isPresent(String skeleton) {
      * @param output
      */
     public void addTable(DateTimeFormats comparison, Appendable output) {
+        UnicodeSet allEscapedCharactersFound = new UnicodeSet();
         try {
             output.append(
                     "<h2>"
                             + hackDoubleLinked("Patterns")
                             + "</h2>"
-                            + "<p>Normally, there is one line containing an example in each Native Example cell. "
+                            + "<p>Normally, there is a single line containing an example in each Native Example cell. "
                             + (!isRTL
                                     ? ""
                                     : "However, two examples are provided if the locale is right-to-left, like Arabic or Hebrew, "
@@ -528,14 +525,13 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
                                             + ltrSpan
                                             + "<i>and</i> a different background"
                                             + spanEnd
-                                            + ". If the display of either example causes strings of letters or numbers to collide, "
-                                            + "then a ⚠️ is shown. ")
+                                            + ". If the display of either example appears to cause strings of letters or numbers to collide, "
+                                            + "then a ⚠️ is shown followed by differences (this is still experimental). ")
                             + "When an example has hidden characters, then "
                             + tableSpan
                             + "an extra line"
                             + spanEnd
-                            + " shows those characters "
-                            + "such as ❰RLM❱ for the invisible Right-to-Left Mark. "
+                            + " shows those characters with short IDs ❰…❱: see the <b>Key</b> below the table. "
                             + "So that the ordering of the characters in memory is clear, they are presented left-to-right one at a time. "
                             + "so that the placement is clear. "
                             + "When a pattern (or a component of a pattern) is missing, it is displayed as "
@@ -572,8 +568,8 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
                             RowStyle.normal,
                             name,
                             skeleton,
-                            comparison.getExample(skeleton),
-                            getExample(skeleton),
+                            comparison.getExample(skeleton, allEscapedCharactersFound),
+                            getExample(skeleton, allEscapedCharactersFound),
                             diff.isPresent(skeleton));
                 }
             }
@@ -611,12 +607,21 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
                             RowStyle.normal,
                             skeleton,
                             skeleton,
-                            comparison.getExample(skeleton),
-                            getExample(skeleton),
+                            comparison.getExample(skeleton, allEscapedCharactersFound),
+                            getExample(skeleton, allEscapedCharactersFound),
                             true);
                 }
             }
             output.append("</table>");
+            if (!allEscapedCharactersFound.isEmpty()) {
+                output.append("\n<h3>Key to Escaped Characters</h3>\n");
+                String keyToEscaped =
+                        CodePointEscaper.getHtmlRows(
+                                allEscapedCharactersFound,
+                                " style='border:1px solid blue; border-collapse: collapse'",
+                                " style='border:1px solid blue'");
+                output.append(keyToEscaped);
+            }
         } catch (IOException e) {
             throw new ICUUncheckedIOException(e);
         }
@@ -626,9 +631,10 @@ public void addTable(DateTimeFormats comparison, Appendable output) {
      * Get an example from the "enhanced" skeleton.
      *
      * @param skeleton
+     * @param escapedCharactersFound Any characters that were escaped are added to this.
      * @return
      */
-    private String getExample(String skeleton) {
+    private String getExample(String skeleton, UnicodeSet escapedCharactersFound) {
         String example;
         if (skeleton.contains("®")) {
             example = getRelativeExampleFromSkeleton(skeleton);
@@ -663,18 +669,17 @@ private String getExample(String skeleton) {
             }
         }
         String transformedExample = TransliteratorUtilities.toHTML.transform(example);
+        ArrayList<String> listOfReorderings = new ArrayList<>();
         if ((isRTL || BIDI_MARKS.containsSome(example)) && !example.contains(MISSING_PART)) {
-            Bidi bidiLTR = new Bidi(example, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT);
-            String orderedLTR = bidiLTR.writeReordered(0);
-            Bidi bidiRTL = new Bidi(example, Bidi.DIRECTION_RIGHT_TO_LEFT);
-            String orderedRTL = bidiRTL.writeReordered(0);
-            if (!orderedLTR.equals(orderedRTL)) {
-                // since this is RTL, we put it first
+            if (!BidiUtils.isOrderingUnchanged(
+                    example,
+                    listOfReorderings,
+                    Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT,
+                    Bidi.DIRECTION_RIGHT_TO_LEFT)) {
+                // since this locale is RTL, we put it first
                 String rtlVersion = rtlStart + transformedExample + divEnd; // not colored
                 String autoVersion = autoLtrStart + transformedExample + divEnd; // colored
-                Set<String> fieldsLTR = getFields(orderedLTR);
-                Set<String> fieldsRTL = getFields(orderedRTL);
-                String alert = fieldsLTR.equals(fieldsRTL) ? "" : " ⚠️ ";
+                String alert = BidiUtils.getAlert(listOfReorderings);
                 transformedExample = rtlVersion + autoVersion + alert;
             } else {
                 String autoVersion = autoStart + transformedExample + divEnd; // not colored
@@ -696,46 +701,11 @@ private String getExample(String skeleton) {
                             });
 
             transformedExample += "<table " + tableStyle + "><tr>" + processed + "</tr></table>";
+            escapedCharactersFound.addAll(new UnicodeSet().addAll(example).retainAll(TO_ESCAPE));
         }
         return transformedExample;
     }
 
-    /**
-     * Return a list of the fields, where each span is a sequence of:
-     *
-     * <ul>
-     *   <li>numbers (\p{N})
-     *   <li>letters & marks ([\p{L}\p{M}
-     *   <li>Other
-     * </ul>
-     *
-     * @param orderedLTR
-     * @return
-     */
-    static final UnicodeSet NUMBERS = new UnicodeSet("\\p{N}").freeze();
-
-    static final UnicodeSet LETTERS_MARKS = new UnicodeSet("[\\p{L}\\p{M}]").freeze();
-    static final UnicodeSet OTHERS =
-            new UnicodeSet(NUMBERS).addAll(LETTERS_MARKS).complement().freeze();
-    static final Set<UnicodeSet> ALL = ImmutableSet.of(NUMBERS, LETTERS_MARKS, OTHERS);
-
-    private Set<String> getFields(String ordered) {
-        Set<String> result =
-                new LinkedHashSet<>(); // doesn't have to be a LHS, but helps with debugging
-        int start = 0;
-        while (start < ordered.length()) {
-            for (UnicodeSet us : ALL) {
-                int end = us.span(ordered, start, SpanCondition.CONTAINED);
-                if (end != start) {
-                    result.add(ordered.substring(start, end));
-                    start = end;
-                    break;
-                }
-            }
-        }
-        return result;
-    }
-
     static final Pattern RELATIVE_DATE =
             PatternCache.get("®([a-z]+(?:-[a-z]+)?)+(-[a-z]+)?([+-]?\\d+)([a-zA-Z]+)?");
 
@@ -1102,11 +1072,14 @@ public static void main(String[] args) throws IOException {
 
         String organization = MyOptions.organization.option.getValue();
         String filter = MyOptions.filter.option.getValue();
+        boolean hasFilter = MyOptions.filter.option.doesOccur();
 
         CLDRFile englishFile = CONFIG.getEnglish();
 
         Factory factory = Factory.make(CLDRPaths.MAIN_DIRECTORY, filter);
-        System.out.println("Total locales: " + factory.getAvailableLanguages().size());
+        final Set<String> availableLocales =
+                hasFilter ? factory.getAvailable() : factory.getAvailableLanguages();
+        System.out.println("Total locales: " + availableLocales.size());
         DateTimeFormats english = new DateTimeFormats().set(englishFile, "gregorian");
 
         new File(DIR).mkdirs();
@@ -1118,7 +1091,7 @@ public static void main(String[] args) throws IOException {
         Map<String, String> sorted = new TreeMap<>();
         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
         Set<String> defaultContent = sdi.getDefaultContentLocales();
-        for (String localeID : factory.getAvailable()) {
+        for (String localeID : availableLocales) {
             Level level = StandardCodes.make().getLocaleCoverageLevel(organization, localeID);
             if (Level.MODERN.compareTo(level) > 0) {
                 continue;