From b9c0541c4c3ca7207bb701c1ca8abc72b567f5d0 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Fri, 24 Nov 2023 14:30:24 -0800
Subject: [PATCH 1/6] Fix JSP failures with scx

---
 .../org/unicode/jsp/XPropertyFactory.java     | 22 +++++++-
 .../unicode/jsptest/TestScriptExtensions.java | 16 ++++++
 .../org/unicode/props/UnicodeProperty.java    | 51 +++++++++++++------
 3 files changed, 72 insertions(+), 17 deletions(-)
 create mode 100644 UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
index e5c8268b9..4245781e4 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
@@ -96,6 +96,7 @@ public final Factory add2(UnicodeProperty sp) {
         add(
                 new CodepointTransformProperty(
                                 new Transform<Integer, String>() {
+                                    @Override
                                     public String transform(Integer source) {
                                         return Normalizer.normalize(source, Normalizer.NFC);
                                     }
@@ -105,6 +106,7 @@ public String transform(Integer source) {
         add(
                 new CodepointTransformProperty(
                                 new Transform<Integer, String>() {
+                                    @Override
                                     public String transform(Integer source) {
                                         return Normalizer.normalize(source, Normalizer.NFD);
                                     }
@@ -114,6 +116,7 @@ public String transform(Integer source) {
         add(
                 new CodepointTransformProperty(
                                 new Transform<Integer, String>() {
+                                    @Override
                                     public String transform(Integer source) {
                                         return Normalizer.normalize(source, Normalizer.NFKC);
                                     }
@@ -123,6 +126,7 @@ public String transform(Integer source) {
         add(
                 new CodepointTransformProperty(
                                 new Transform<Integer, String>() {
+                                    @Override
                                     public String transform(Integer source) {
                                         return Normalizer.normalize(source, Normalizer.NFKD);
                                     }
@@ -133,6 +137,7 @@ public String transform(Integer source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         return UCharacter.foldCase(source, true);
                                     }
@@ -142,6 +147,7 @@ public String transform(String source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         return UCharacter.toLowerCase(ULocale.ROOT, source);
                                     }
@@ -151,6 +157,7 @@ public String transform(String source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         return UCharacter.toUpperCase(ULocale.ROOT, source);
                                     }
@@ -160,6 +167,7 @@ public String transform(String source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         return UCharacter.toTitleCase(ULocale.ROOT, source, null);
                                     }
@@ -170,6 +178,7 @@ public String transform(String source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         StringBuilder b = new StringBuilder();
                                         for (int cp : CharSequences.codePoints(source)) {
@@ -184,6 +193,7 @@ public String transform(String source) {
         add(
                 new StringTransformProperty(
                                 new StringTransform() {
+                                    @Override
                                     public String transform(String source) {
                                         String result = NFM.nfm.get(source);
                                         return result == null ? source : result;
@@ -201,6 +211,7 @@ public String transform(String source) {
         add(
                 new CodepointTransformProperty(
                                 new Transform<Integer, String>() {
+                                    @Override
                                     public String transform(Integer source) {
                                         return UnicodeUtilities.getSubheader().getSubheader(source);
                                     }
@@ -251,7 +262,8 @@ public String transform(Integer source) {
                         .setMain("Script_Extensions", "scx", UnicodeProperty.ENUMERATED, "1.1")
                         .addValueAliases(
                                 ScriptTester.getScriptSpecialsAlternates(),
-                                AliasAddAction.IGNORE_IF_MISSING));
+                                AliasAddAction.IGNORE_IF_MISSING)
+                        .setMultivalued(true));
 
         CachedProps cp = CachedProps.CACHED_PROPS;
         for (String prop : cp.getAvailable()) {
@@ -652,6 +664,7 @@ public StringTransformProperty(
             setUniformUnassigned(hasUniformUnassigned);
         }
 
+        @Override
         protected String _getValue(int codepoint) {
             return transform.transform(UTF16.valueOf(codepoint));
         }
@@ -666,6 +679,7 @@ public CodepointTransformProperty(
             setUniformUnassigned(hasUniformUnassigned);
         }
 
+        @Override
         protected String _getValue(int codepoint) {
             return transform.transform(codepoint);
         }
@@ -682,6 +696,7 @@ public static class EncodingProperty extends SimpleProperty {
             encoder = new CharEncoder(charset, false, false);
         }
 
+        @Override
         protected String _getValue(int codepoint) {
             int len = encoder.getValue(codepoint, temp, 0);
             if (len < 0) {
@@ -697,6 +712,7 @@ protected String _getValue(int codepoint) {
             return result.toString();
         }
 
+        @Override
         public boolean isDefault(int codepoint) {
             int len = encoder.getValue(codepoint, temp, 0);
             return len < 0;
@@ -716,6 +732,7 @@ public static class EncodingPropertyBoolean extends SimpleProperty {
             encoder = new CharEncoder(charset, true, true);
         }
 
+        @Override
         protected String _getValue(int codepoint) {
             return (encoder.getValue(codepoint, null, 0) > 0) ? "Yes" : "No";
         }
@@ -731,6 +748,7 @@ public XPropertyFactory.UnicodeSetProperty set(UnicodeSet set) {
             return this;
         }
 
+        @Override
         protected UnicodeMap<String> _getUnicodeMap() {
             UnicodeMap<String> result = new UnicodeMap<String>();
             result.putAll(unicodeSet, "Yes");
@@ -743,10 +761,12 @@ public XPropertyFactory.UnicodeSetProperty set(String string) {
             return set(new UnicodeSet(string).freeze());
         }
 
+        @Override
         protected String _getValue(int codepoint) {
             return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
         }
 
+        @Override
         protected List _getAvailableValues(List result) {
             return YESNO;
         }
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
new file mode 100644
index 000000000..503b90f29
--- /dev/null
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
@@ -0,0 +1,16 @@
+package org.unicode.jsptest;
+
+import com.ibm.icu.text.UnicodeSet;
+import org.junit.jupiter.api.Test;
+import org.unicode.jsp.UnicodeSetUtilities;
+import org.unicode.unittest.TestFmwkMinusMinus;
+
+public class TestScriptExtensions extends TestFmwkMinusMinus {
+    @Test
+    public void TestBasic() {
+        // As of 2023-11-24, scx was not working properly
+        String setA = "\\p{scx=deva}";
+        UnicodeSet deva = UnicodeSetUtilities.parseUnicodeSet(setA);
+        assertTrue(setA + "contains \\u1CD5", deva.contains(0x1cd5));
+    }
+}
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 773e78f4e..71500c366 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -6,6 +6,7 @@
  */
 package org.unicode.props;
 
+import com.google.common.base.Splitter;
 import com.ibm.icu.dev.util.UnicodeMap;
 import com.ibm.icu.impl.Utility;
 import com.ibm.icu.text.SymbolTable;
@@ -32,6 +33,7 @@
 
 public abstract class UnicodeProperty extends UnicodeLabel {
 
+    private static final Splitter SPLIT_COMMAS = Splitter.on(",");
     public static final UnicodeSet NONCHARACTERS =
             new UnicodeSet("[:noncharactercodepoint:]").freeze();
     public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
@@ -150,6 +152,13 @@ public static synchronized void ResetCacheProperties() {
     private Map<String, String> valueToFirstValueAlias = null;
 
     private boolean hasUniformUnassigned = true;
+    
+    private boolean isMultivalued = false;
+    
+    public UnicodeProperty setMultivalued(boolean value) {
+        isMultivalued = value;
+        return this;
+    }
 
     /*
      * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
@@ -309,7 +318,7 @@ public final String getValue(int codepoint, boolean getShortest) {
 
     public final String getFirstNameAlias() {
         if (firstNameAlias == null) {
-            firstNameAlias = (String) getNameAliases().get(0);
+            firstNameAlias = getNameAliases().get(0);
         }
         return firstNameAlias;
     }
@@ -407,13 +416,20 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
         Iterator<String> it = um.getAvailableValues(null).iterator();
         main:
         while (it.hasNext()) {
-            String value = (String) it.next();
+            String value = it.next();
             temp.clear();
             Iterator<String> it2 = getValueAliases(value, temp).iterator();
             while (it2.hasNext()) {
-                String value2 = (String) it2.next();
+                String value2 = it2.next();
                 // System.out.println("Values:" + value2);
-                if (matcher.test(value2) || matcher.test(toSkeleton(value2))) {
+                if (isMultivalued && value2.contains(",")) {
+                   for (String part : SPLIT_COMMAS.split(value2)) {
+                       if (matcher.test(part) || matcher.test(toSkeleton(part))) {
+                           um.keySet(value, result);
+                           continue main;
+                       }
+                   }
+                } else if (matcher.test(value2) || matcher.test(toSkeleton(value2))) {
                     um.keySet(value, result);
                     continue main;
                 }
@@ -537,7 +553,7 @@ protected UnicodeMap<String> _getUnicodeMap() {
                 // if (DEBUG && i == 0x41) System.out.println(i + "\t" +
                 // getValue(i));
                 String value = getValue(i);
-                String resultValue = (String) result.getValue(i);
+                String resultValue = result.getValue(i);
                 if (!value.equals(resultValue)) {
                     throw new RuntimeException("Value failure at: " + Utility.hex(i));
                 }
@@ -760,13 +776,13 @@ public final Factory add(UnicodeProperty sp) {
             List<String> c = sp.getNameAliases(new ArrayList<>(1));
             Iterator<String> it = c.iterator();
             while (it.hasNext()) {
-                skeletonNames.put(toSkeleton((String) it.next()), sp);
+                skeletonNames.put(toSkeleton(it.next()), sp);
             }
             return this;
         }
 
         public UnicodeProperty getProperty(String propertyAlias) {
-            return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias));
+            return skeletonNames.get(toSkeleton(propertyAlias));
         }
 
         public final List<String> getAvailableNames() {
@@ -790,7 +806,7 @@ public final List<String> getAvailableNames(int propertyTypeMask, List<String> r
             if (result == null) result = new ArrayList<>(1);
             Iterator<String> it = canonicalNames.keySet().iterator();
             while (it.hasNext()) {
-                String item = (String) it.next();
+                String item = it.next();
                 UnicodeProperty property = getProperty(item);
                 if (DEBUG) System.out.println("Properties: " + item + "," + property.getType());
                 if (!property.isType(propertyTypeMask)) {
@@ -1008,11 +1024,13 @@ public UnicodeProperty setFilter(StringFilter filter) {
 
         List<String> temp = new ArrayList<>(1);
 
+        @Override
         public List<String> _getAvailableValues(List<String> result) {
             temp.clear();
             return filter.addUnique(property.getAvailableValues(temp), result);
         }
 
+        @Override
         public List<String> _getNameAliases(List<String> result) {
             temp.clear();
             return filter.addUnique(property.getNameAliases(temp), result);
@@ -1023,13 +1041,14 @@ public String _getValue(int codepoint) {
             return filter.remap(property.getValue(codepoint));
         }
 
+        @Override
         public List<String> _getValueAliases(String valueAlias, List<String> result) {
             if (backmap == null) {
                 backmap = new HashMap<>(1);
                 temp.clear();
                 Iterator<String> it = property.getAvailableValues(temp).iterator();
                 while (it.hasNext()) {
-                    String item = (String) it.next();
+                    String item = it.next();
                     String mappedItem = filter.remap(item);
                     if (backmap.get(mappedItem) != null && !allowValueAliasCollisions) {
                         throw new IllegalArgumentException(
@@ -1038,7 +1057,7 @@ public List<String> _getValueAliases(String valueAlias, List<String> result) {
                     backmap.put(mappedItem, item);
                 }
             }
-            valueAlias = (String) backmap.get(valueAlias);
+            valueAlias = backmap.get(valueAlias);
             temp.clear();
             return filter.addUnique(property.getValueAliases(valueAlias, temp), result);
         }
@@ -1065,7 +1084,7 @@ public final List<String> addUnique(Collection<String> source, List<String> resu
             if (result == null) result = new ArrayList<>(1);
             Iterator<String> it = source.iterator();
             while (it.hasNext()) {
-                UnicodeProperty.addUnique(remap((String) it.next()), result);
+                UnicodeProperty.addUnique(remap(it.next()), result);
             }
             return result;
         }
@@ -1305,7 +1324,7 @@ public SimpleProperty setValues(String[] valueAliases, String[] alternateValueAl
         public SimpleProperty setValues(List<String> valueAliases) {
             this.values = new LinkedHashSet<>(valueAliases);
             for (Iterator<String> it = this.values.iterator(); it.hasNext(); ) {
-                _addToValues((String) it.next(), null);
+                _addToValues(it.next(), null);
             }
             return this;
         }
@@ -1321,7 +1340,7 @@ protected void _fillValues() {
             List<String> newvalues =
                     getUnicodeMap_internal().getAvailableValues(new ArrayList<String>());
             for (Iterator<String> it = newvalues.iterator(); it.hasNext(); ) {
-                _addToValues((String) it.next(), null);
+                _addToValues(it.next(), null);
             }
         }
 
@@ -1380,7 +1399,7 @@ public UnicodeMapProperty set(UnicodeMap<String> map) {
 
         @Override
         protected String _getValue(int codepoint) {
-            return (String) unicodeMap.getValue(codepoint);
+            return unicodeMap.getValue(codepoint);
         }
 
         /* protected List _getValueAliases(String valueAlias, List result) {
@@ -1407,7 +1426,7 @@ public boolean isValidValue(String propertyValue) {
         if (isType(STRING_OR_MISC_MASK)) {
             return true;
         }
-        Collection<String> values = (Collection<String>) getAvailableValues();
+        Collection<String> values = getAvailableValues();
         for (String valueAlias : values) {
             if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) {
                 return true;
@@ -1426,7 +1445,7 @@ public List<String> getValueAliases() {
         if (isType(STRING_OR_MISC_MASK)) {
             return result;
         }
-        Collection<String> values = (Collection<String>) getAvailableValues();
+        Collection<String> values = getAvailableValues();
         for (String valueAlias : values) {
             UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result);
         }

From 74014266c3846b08361b8bc9588ee038ebd9e886 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Sat, 25 Nov 2023 07:44:55 -0800
Subject: [PATCH 2/6] Prevent deva,beng from working; run spotless

---
 .../unicode/jsptest/TestScriptExtensions.java | 31 ++++++++++++++++---
 .../org/unicode/jsptest/TestUnicodeSet.java   |  8 -----
 .../org/unicode/props/UnicodeProperty.java    | 29 ++++++++++-------
 3 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
index 503b90f29..c44850f63 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
@@ -7,10 +7,33 @@
 
 public class TestScriptExtensions extends TestFmwkMinusMinus {
     @Test
-    public void TestBasic() {
+    public void TestScx1Script() {
         // As of 2023-11-24, scx was not working properly
-        String setA = "\\p{scx=deva}";
-        UnicodeSet deva = UnicodeSetUtilities.parseUnicodeSet(setA);
-        assertTrue(setA + "contains \\u1CD5", deva.contains(0x1cd5));
+        String unicodeSetString = "\\p{scx=deva}";
+        UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
+
+        UnicodeSet mustContain = new UnicodeSet("[ᳵ।]"); // one character B&D, other B&D&D&G&...
+        assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain));
+
+        UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bengali character
+        assertFalse(
+                unicodeSetString + " !contains " + mustNotContain,
+                parsed.containsAll(mustNotContain));
+    }
+
+    @Test
+    public void TestScxMulti() {
+        // As of 2023-11-24, scx was not working properly
+        String unicodeSetString = "\\p{scx=beng,deva}";
+        String exceptionMessage = null;
+        try {
+            UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
+        } catch (Exception e) {
+            exceptionMessage = e.getMessage();
+        }
+        assertEquals(
+                "Expected exception",
+                "Multivalued property values can't contain commas.",
+                exceptionMessage);
     }
 }
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
index e05911654..d0b97a857 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java
@@ -413,14 +413,6 @@ public void TestPerMill(final String name, final Charset charset) {
         }
     }
 
-    @Test
-    public void TestScriptSpecials() {
-        //        UnicodeSet set = UnicodeSetUtilities.parseUnicodeSet("[:scs=Hant:]");
-        //        assertNotEquals("Hant", 0, set.size());
-        UnicodeSet set2 = UnicodeSetUtilities.parseUnicodeSet("[:scx=Arab,Syrc:]");
-        assertNotEquals("Arab Syrc", 0, set2.size());
-    }
-
     @Test
     public void TestGC() {
         Map<String, R2<String, UnicodeSet>> SPECIAL_GC =
diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
index 71500c366..615986a7a 100644
--- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
+++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
@@ -152,9 +152,9 @@ public static synchronized void ResetCacheProperties() {
     private Map<String, String> valueToFirstValueAlias = null;
 
     private boolean hasUniformUnassigned = true;
-    
+
     private boolean isMultivalued = false;
-    
+
     public UnicodeProperty setMultivalued(boolean value) {
         isMultivalued = value;
         return this;
@@ -387,10 +387,15 @@ public final UnicodeSet getSet(PatternMatcher matcher) {
      * the original contents.
      */
     public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
-        return getSet(
-                new SimpleMatcher(
-                        propertyValue, isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
-                result);
+        if (isMultivalued && propertyValue.contains(",")) {
+            throw new IllegalArgumentException("Multivalued property values can't contain commas.");
+        } else {
+            return getSet(
+                    new SimpleMatcher(
+                            propertyValue,
+                            isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR),
+                    result);
+        }
     }
 
     private UnicodeMap<String> unicodeMap = null;
@@ -423,12 +428,12 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
                 String value2 = it2.next();
                 // System.out.println("Values:" + value2);
                 if (isMultivalued && value2.contains(",")) {
-                   for (String part : SPLIT_COMMAS.split(value2)) {
-                       if (matcher.test(part) || matcher.test(toSkeleton(part))) {
-                           um.keySet(value, result);
-                           continue main;
-                       }
-                   }
+                    for (String part : SPLIT_COMMAS.split(value2)) {
+                        if (matcher.test(part) || matcher.test(toSkeleton(part))) {
+                            um.keySet(value, result);
+                            continue main;
+                        }
+                    }
                 } else if (matcher.test(value2) || matcher.test(toSkeleton(value2))) {
                     um.keySet(value, result);
                     continue main;

From 8cea65328b93083b1a6ff0ebadc2ccb2ba9cc2d6 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Sat, 25 Nov 2023 15:56:47 -0800
Subject: [PATCH 3/6] Add exemplars as second example

---
 .../org/unicode/jsp/XPropertyFactory.java     | 95 +++++++++++++++++++
 ...ptExtensions.java => TestMultivalued.java} | 16 +++-
 2 files changed, 110 insertions(+), 1 deletion(-)
 rename UnicodeJsps/src/test/java/org/unicode/jsptest/{TestScriptExtensions.java => TestMultivalued.java} (69%)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
index 4245781e4..676242d7e 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
@@ -1,6 +1,10 @@
 package org.unicode.jsp;
 
+import com.google.common.base.Joiner;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.TreeMultimap;
 import com.ibm.icu.dev.util.UnicodeMap;
+import com.ibm.icu.dev.util.UnicodeMap.EntryRange;
 import com.ibm.icu.lang.CharSequences;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty.NameChoice;
@@ -12,13 +16,19 @@
 import com.ibm.icu.text.Transform;
 import com.ibm.icu.text.UTF16;
 import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSetIterator;
+import com.ibm.icu.util.LocaleData;
 import com.ibm.icu.util.ULocale;
 import com.ibm.icu.util.VersionInfo;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Locale;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeSet;
 import org.unicode.idna.Idna.IdnaType;
 import org.unicode.idna.Idna2003;
 import org.unicode.idna.Idna2008;
@@ -28,9 +38,13 @@
 import org.unicode.props.UnicodeProperty.BaseProperty;
 import org.unicode.props.UnicodeProperty.Factory;
 import org.unicode.props.UnicodeProperty.SimpleProperty;
+import org.unicode.text.utility.Utility;
 
 public class XPropertyFactory extends UnicodeProperty.Factory {
 
+    private static final Joiner JOIN_COMMAS = Joiner.on(",");
+    private static final boolean DEBUG_MULTI = false;
+
     static final UnicodeSet ALL =
             new UnicodeSet("[[:^C:][:Cc:][:Cf:][:noncharactercodepoint:]]").freeze();
 
@@ -250,6 +264,9 @@ public String transform(Integer source) {
                         .setMain("bmp", "bmp", UnicodeProperty.BINARY, "6.0"));
 
         addCollationProperty();
+        addExamplarProperty(LocaleData.ES_STANDARD, "exem", "exemplar");
+        addExamplarProperty(LocaleData.ES_AUXILIARY, "exema", "exemplar_aux");
+        addExamplarProperty(LocaleData.ES_PUNCTUATION, "exemp", "exemplar_punct");
 
         // set up the special script property
         UnicodeProperty scriptProp = base.getProperty("sc");
@@ -301,6 +318,84 @@ public String transform(Integer source) {
                         .setMain("RGI_Emoji", "RGI_Emoji", UnicodeProperty.BINARY, "13.0"));
     }
 
+    private void addExamplarProperty(
+            int exemplarType, String propertyAbbreviation, String propertyName) {
+        Multimap<Integer, String> data = TreeMultimap.create();
+        Set<String> localeSet = new TreeSet<>();
+
+        for (ULocale ulocale : ULocale.getAvailableLocales()) {
+            if (!ulocale.getCountry().isEmpty()) {
+                continue;
+                // we want to skip cases where characters are in the parent locale, but there is no
+                // ULocale parentLocale = ulocale.getParent();
+            }
+            UnicodeSet exemplarSet = LocaleData.getExemplarSet(ulocale, 0, exemplarType);
+            if (!ulocale.getScript().isEmpty()) {
+                // we can't find out the parent locale or defaultContent locale in ICU, so we hack
+                // it
+                String langLocale = ulocale.getLanguage();
+                UnicodeSet langExemplarSet =
+                        LocaleData.getExemplarSet(new ULocale(langLocale), 0, exemplarType);
+                if (langExemplarSet.equals(exemplarSet)) {
+                    continue;
+                }
+            }
+            String locale = ulocale.toString();
+            localeSet.add(locale);
+            for (UnicodeSetIterator it = new UnicodeSetIterator(exemplarSet); it.nextRange(); ) {
+                if (it.codepoint == UnicodeSetIterator.IS_STRING) {
+                    // flatten
+                    int cp = 0;
+                    for (int i = 0; i < it.string.length(); i += Character.charCount(cp)) {
+                        cp = it.string.codePointAt(i);
+                        data.put(cp, locale);
+                    }
+                } else {
+                    for (int cp = it.codepoint; cp <= it.codepointEnd; ++cp) {
+                        data.put(cp, locale);
+                    }
+                }
+            }
+        }
+
+        // convert to UnicodeMap
+        UnicodeMap<String> unicodeMap = new UnicodeMap<>();
+        for (Entry<Integer, Collection<String>> entry : data.asMap().entrySet()) {
+            String value = JOIN_COMMAS.join(entry.getValue()).intern();
+            unicodeMap.put(entry.getKey(), value);
+        }
+        if (DEBUG_MULTI) {
+            System.out.println("\n" + propertyName);
+            for (EntryRange<String> entry : unicodeMap.entryRanges()) {
+                System.out.println(
+                        Utility.hex(entry.codepoint)
+                                + (entry.codepoint == entry.codepointEnd
+                                        ? ""
+                                        : "-" + Utility.hex(entry.codepointEnd))
+                                + " ;\t"
+                                + entry.value);
+            }
+        }
+
+        // put locales into right format
+        String[] localeList = localeSet.toArray(new String[localeSet.size()]);
+        String[][] locales = new String[][] {localeList, localeList}; // abbreviations are the same
+
+
+        add(
+                new UnicodeProperty.UnicodeMapProperty()
+                        .set(unicodeMap)
+                        .setMain(
+                                propertyName,
+                                propertyAbbreviation,
+                                UnicodeProperty.ENUMERATED,
+                                "1.1")
+                        .addValueAliases(
+                                locales,
+                                AliasAddAction.ADD_MAIN_ALIAS)
+                        .setMultivalued(true));
+    }
+
     private void addCollationProperty() {
         RuleBasedCollator c = UnicodeSetUtilities.RAW_COLLATOR;
         // (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
similarity index 69%
rename from UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
rename to UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
index c44850f63..5c0104e1e 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
@@ -5,7 +5,7 @@
 import org.unicode.jsp.UnicodeSetUtilities;
 import org.unicode.unittest.TestFmwkMinusMinus;
 
-public class TestScriptExtensions extends TestFmwkMinusMinus {
+public class TestMultivalued extends TestFmwkMinusMinus {
     @Test
     public void TestScx1Script() {
         // As of 2023-11-24, scx was not working properly
@@ -36,4 +36,18 @@ public void TestScxMulti() {
                 "Multivalued property values can't contain commas.",
                 exceptionMessage);
     }
+    
+    @Test
+    public void TestExemplars() {
+        String unicodeSetString = "\\p{exem=da}";
+        UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
+        
+        UnicodeSet mustContain = new UnicodeSet("[æ]");
+        assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain));
+
+        UnicodeSet mustNotContain = new UnicodeSet("[ç]");
+        assertFalse(
+                unicodeSetString + " !contains " + mustNotContain,
+                parsed.containsAll(mustNotContain));
+    }
 }

From e1bec956f0f03e8035f2bd1f394fa73b577ff304 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Sat, 25 Nov 2023 16:03:56 -0800
Subject: [PATCH 4/6] Spotless

---
 .../src/main/java/org/unicode/jsp/XPropertyFactory.java      | 5 +----
 .../src/test/java/org/unicode/jsptest/TestMultivalued.java   | 4 ++--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
index 676242d7e..9c91bdf2f 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
@@ -381,7 +381,6 @@ private void addExamplarProperty(
         String[] localeList = localeSet.toArray(new String[localeSet.size()]);
         String[][] locales = new String[][] {localeList, localeList}; // abbreviations are the same
 
-
         add(
                 new UnicodeProperty.UnicodeMapProperty()
                         .set(unicodeMap)
@@ -390,9 +389,7 @@ private void addExamplarProperty(
                                 propertyAbbreviation,
                                 UnicodeProperty.ENUMERATED,
                                 "1.1")
-                        .addValueAliases(
-                                locales,
-                                AliasAddAction.ADD_MAIN_ALIAS)
+                        .addValueAliases(locales, AliasAddAction.ADD_MAIN_ALIAS)
                         .setMultivalued(true));
     }
 
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
index 5c0104e1e..e4f531da3 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
@@ -36,12 +36,12 @@ public void TestScxMulti() {
                 "Multivalued property values can't contain commas.",
                 exceptionMessage);
     }
-    
+
     @Test
     public void TestExemplars() {
         String unicodeSetString = "\\p{exem=da}";
         UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
-        
+
         UnicodeSet mustContain = new UnicodeSet("[æ]");
         assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain));
 

From 5d6fc6315bc8ba88c7d17a43286602b23fd95fb7 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Mon, 27 Nov 2023 15:24:34 -0800
Subject: [PATCH 5/6] Fixes for Markus's review

---
 .../src/main/java/org/unicode/jsp/XPropertyFactory.java    | 7 +++----
 .../src/test/java/org/unicode/jsptest/TestMultivalued.java | 2 --
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
index 9c91bdf2f..b7232c295 100644
--- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
+++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java
@@ -4,7 +4,6 @@
 import com.google.common.collect.Multimap;
 import com.google.common.collect.TreeMultimap;
 import com.ibm.icu.dev.util.UnicodeMap;
-import com.ibm.icu.dev.util.UnicodeMap.EntryRange;
 import com.ibm.icu.lang.CharSequences;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty.NameChoice;
@@ -324,7 +323,7 @@ private void addExamplarProperty(
         Set<String> localeSet = new TreeSet<>();
 
         for (ULocale ulocale : ULocale.getAvailableLocales()) {
-            if (!ulocale.getCountry().isEmpty()) {
+            if (!ulocale.getCountry().isEmpty() || !ulocale.getVariant().isEmpty()) {
                 continue;
                 // we want to skip cases where characters are in the parent locale, but there is no
                 // ULocale parentLocale = ulocale.getParent();
@@ -340,7 +339,7 @@ private void addExamplarProperty(
                     continue;
                 }
             }
-            String locale = ulocale.toString();
+            String locale = ulocale.toLanguageTag();
             localeSet.add(locale);
             for (UnicodeSetIterator it = new UnicodeSetIterator(exemplarSet); it.nextRange(); ) {
                 if (it.codepoint == UnicodeSetIterator.IS_STRING) {
@@ -366,7 +365,7 @@ private void addExamplarProperty(
         }
         if (DEBUG_MULTI) {
             System.out.println("\n" + propertyName);
-            for (EntryRange<String> entry : unicodeMap.entryRanges()) {
+            for (UnicodeMap.EntryRange<String> entry : unicodeMap.entryRanges()) {
                 System.out.println(
                         Utility.hex(entry.codepoint)
                                 + (entry.codepoint == entry.codepointEnd
diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
index e4f531da3..f5c4373b0 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
@@ -8,7 +8,6 @@
 public class TestMultivalued extends TestFmwkMinusMinus {
     @Test
     public void TestScx1Script() {
-        // As of 2023-11-24, scx was not working properly
         String unicodeSetString = "\\p{scx=deva}";
         UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString);
 
@@ -23,7 +22,6 @@ public void TestScx1Script() {
 
     @Test
     public void TestScxMulti() {
-        // As of 2023-11-24, scx was not working properly
         String unicodeSetString = "\\p{scx=beng,deva}";
         String exceptionMessage = null;
         try {

From 2d69f506254c07fd28e646e4c4087a078eeb4912 Mon Sep 17 00:00:00 2001
From: macchiati <mark@macchiato.com>
Date: Mon, 27 Nov 2023 15:26:23 -0800
Subject: [PATCH 6/6] Fix Bangla comment also

---
 .../src/test/java/org/unicode/jsptest/TestMultivalued.java      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
index f5c4373b0..8ed9706ef 100644
--- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
+++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java
@@ -14,7 +14,7 @@ public void TestScx1Script() {
         UnicodeSet mustContain = new UnicodeSet("[ᳵ।]"); // one character B&D, other B&D&D&G&...
         assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain));
 
-        UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bengali character
+        UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bangla character
         assertFalse(
                 unicodeSetString + " !contains " + mustNotContain,
                 parsed.containsAll(mustNotContain));