From b9c0541c4c3ca7207bb701c1ca8abc72b567f5d0 Mon Sep 17 00:00:00 2001 From: macchiati Date: Fri, 24 Nov 2023 14:30:24 -0800 Subject: [PATCH 1/6] Fix JSP failures with scx --- .../org/unicode/jsp/XPropertyFactory.java | 22 +++++++- .../unicode/jsptest/TestScriptExtensions.java | 16 ++++++ .../org/unicode/props/UnicodeProperty.java | 51 +++++++++++++------ 3 files changed, 72 insertions(+), 17 deletions(-) create mode 100644 UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index e5c8268b9..4245781e4 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -96,6 +96,7 @@ public final Factory add2(UnicodeProperty sp) { add( new CodepointTransformProperty( new Transform() { + @Override public String transform(Integer source) { return Normalizer.normalize(source, Normalizer.NFC); } @@ -105,6 +106,7 @@ public String transform(Integer source) { add( new CodepointTransformProperty( new Transform() { + @Override public String transform(Integer source) { return Normalizer.normalize(source, Normalizer.NFD); } @@ -114,6 +116,7 @@ public String transform(Integer source) { add( new CodepointTransformProperty( new Transform() { + @Override public String transform(Integer source) { return Normalizer.normalize(source, Normalizer.NFKC); } @@ -123,6 +126,7 @@ public String transform(Integer source) { add( new CodepointTransformProperty( new Transform() { + @Override public String transform(Integer source) { return Normalizer.normalize(source, Normalizer.NFKD); } @@ -133,6 +137,7 @@ public String transform(Integer source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { return UCharacter.foldCase(source, true); } @@ -142,6 +147,7 @@ public String transform(String source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { return UCharacter.toLowerCase(ULocale.ROOT, source); } @@ -151,6 +157,7 @@ public String transform(String source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { return UCharacter.toUpperCase(ULocale.ROOT, source); } @@ -160,6 +167,7 @@ public String transform(String source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { return UCharacter.toTitleCase(ULocale.ROOT, source, null); } @@ -170,6 +178,7 @@ public String transform(String source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { StringBuilder b = new StringBuilder(); for (int cp : CharSequences.codePoints(source)) { @@ -184,6 +193,7 @@ public String transform(String source) { add( new StringTransformProperty( new StringTransform() { + @Override public String transform(String source) { String result = NFM.nfm.get(source); return result == null ? source : result; @@ -201,6 +211,7 @@ public String transform(String source) { add( new CodepointTransformProperty( new Transform() { + @Override public String transform(Integer source) { return UnicodeUtilities.getSubheader().getSubheader(source); } @@ -251,7 +262,8 @@ public String transform(Integer source) { .setMain("Script_Extensions", "scx", UnicodeProperty.ENUMERATED, "1.1") .addValueAliases( ScriptTester.getScriptSpecialsAlternates(), - AliasAddAction.IGNORE_IF_MISSING)); + AliasAddAction.IGNORE_IF_MISSING) + .setMultivalued(true)); CachedProps cp = CachedProps.CACHED_PROPS; for (String prop : cp.getAvailable()) { @@ -652,6 +664,7 @@ public StringTransformProperty( setUniformUnassigned(hasUniformUnassigned); } + @Override protected String _getValue(int codepoint) { return transform.transform(UTF16.valueOf(codepoint)); } @@ -666,6 +679,7 @@ public CodepointTransformProperty( setUniformUnassigned(hasUniformUnassigned); } + @Override protected String _getValue(int codepoint) { return transform.transform(codepoint); } @@ -682,6 +696,7 @@ public static class EncodingProperty extends SimpleProperty { encoder = new CharEncoder(charset, false, false); } + @Override protected String _getValue(int codepoint) { int len = encoder.getValue(codepoint, temp, 0); if (len < 0) { @@ -697,6 +712,7 @@ protected String _getValue(int codepoint) { return result.toString(); } + @Override public boolean isDefault(int codepoint) { int len = encoder.getValue(codepoint, temp, 0); return len < 0; @@ -716,6 +732,7 @@ public static class EncodingPropertyBoolean extends SimpleProperty { encoder = new CharEncoder(charset, true, true); } + @Override protected String _getValue(int codepoint) { return (encoder.getValue(codepoint, null, 0) > 0) ? "Yes" : "No"; } @@ -731,6 +748,7 @@ public XPropertyFactory.UnicodeSetProperty set(UnicodeSet set) { return this; } + @Override protected UnicodeMap _getUnicodeMap() { UnicodeMap result = new UnicodeMap(); result.putAll(unicodeSet, "Yes"); @@ -743,10 +761,12 @@ public XPropertyFactory.UnicodeSetProperty set(String string) { return set(new UnicodeSet(string).freeze()); } + @Override protected String _getValue(int codepoint) { return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1]; } + @Override protected List _getAvailableValues(List result) { return YESNO; } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java new file mode 100644 index 000000000..503b90f29 --- /dev/null +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java @@ -0,0 +1,16 @@ +package org.unicode.jsptest; + +import com.ibm.icu.text.UnicodeSet; +import org.junit.jupiter.api.Test; +import org.unicode.jsp.UnicodeSetUtilities; +import org.unicode.unittest.TestFmwkMinusMinus; + +public class TestScriptExtensions extends TestFmwkMinusMinus { + @Test + public void TestBasic() { + // As of 2023-11-24, scx was not working properly + String setA = "\\p{scx=deva}"; + UnicodeSet deva = UnicodeSetUtilities.parseUnicodeSet(setA); + assertTrue(setA + "contains \\u1CD5", deva.contains(0x1cd5)); + } +} diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 773e78f4e..71500c366 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -6,6 +6,7 @@ */ package org.unicode.props; +import com.google.common.base.Splitter; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.impl.Utility; import com.ibm.icu.text.SymbolTable; @@ -32,6 +33,7 @@ public abstract class UnicodeProperty extends UnicodeLabel { + private static final Splitter SPLIT_COMMAS = Splitter.on(","); public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze(); public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze(); @@ -150,6 +152,13 @@ public static synchronized void ResetCacheProperties() { private Map valueToFirstValueAlias = null; private boolean hasUniformUnassigned = true; + + private boolean isMultivalued = false; + + public UnicodeProperty setMultivalued(boolean value) { + isMultivalued = value; + return this; + } /* * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name @@ -309,7 +318,7 @@ public final String getValue(int codepoint, boolean getShortest) { public final String getFirstNameAlias() { if (firstNameAlias == null) { - firstNameAlias = (String) getNameAliases().get(0); + firstNameAlias = getNameAliases().get(0); } return firstNameAlias; } @@ -407,13 +416,20 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { Iterator it = um.getAvailableValues(null).iterator(); main: while (it.hasNext()) { - String value = (String) it.next(); + String value = it.next(); temp.clear(); Iterator it2 = getValueAliases(value, temp).iterator(); while (it2.hasNext()) { - String value2 = (String) it2.next(); + String value2 = it2.next(); // System.out.println("Values:" + value2); - if (matcher.test(value2) || matcher.test(toSkeleton(value2))) { + if (isMultivalued && value2.contains(",")) { + for (String part : SPLIT_COMMAS.split(value2)) { + if (matcher.test(part) || matcher.test(toSkeleton(part))) { + um.keySet(value, result); + continue main; + } + } + } else if (matcher.test(value2) || matcher.test(toSkeleton(value2))) { um.keySet(value, result); continue main; } @@ -537,7 +553,7 @@ protected UnicodeMap _getUnicodeMap() { // if (DEBUG && i == 0x41) System.out.println(i + "\t" + // getValue(i)); String value = getValue(i); - String resultValue = (String) result.getValue(i); + String resultValue = result.getValue(i); if (!value.equals(resultValue)) { throw new RuntimeException("Value failure at: " + Utility.hex(i)); } @@ -760,13 +776,13 @@ public final Factory add(UnicodeProperty sp) { List c = sp.getNameAliases(new ArrayList<>(1)); Iterator it = c.iterator(); while (it.hasNext()) { - skeletonNames.put(toSkeleton((String) it.next()), sp); + skeletonNames.put(toSkeleton(it.next()), sp); } return this; } public UnicodeProperty getProperty(String propertyAlias) { - return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias)); + return skeletonNames.get(toSkeleton(propertyAlias)); } public final List getAvailableNames() { @@ -790,7 +806,7 @@ public final List getAvailableNames(int propertyTypeMask, List r if (result == null) result = new ArrayList<>(1); Iterator it = canonicalNames.keySet().iterator(); while (it.hasNext()) { - String item = (String) it.next(); + String item = it.next(); UnicodeProperty property = getProperty(item); if (DEBUG) System.out.println("Properties: " + item + "," + property.getType()); if (!property.isType(propertyTypeMask)) { @@ -1008,11 +1024,13 @@ public UnicodeProperty setFilter(StringFilter filter) { List temp = new ArrayList<>(1); + @Override public List _getAvailableValues(List result) { temp.clear(); return filter.addUnique(property.getAvailableValues(temp), result); } + @Override public List _getNameAliases(List result) { temp.clear(); return filter.addUnique(property.getNameAliases(temp), result); @@ -1023,13 +1041,14 @@ public String _getValue(int codepoint) { return filter.remap(property.getValue(codepoint)); } + @Override public List _getValueAliases(String valueAlias, List result) { if (backmap == null) { backmap = new HashMap<>(1); temp.clear(); Iterator it = property.getAvailableValues(temp).iterator(); while (it.hasNext()) { - String item = (String) it.next(); + String item = it.next(); String mappedItem = filter.remap(item); if (backmap.get(mappedItem) != null && !allowValueAliasCollisions) { throw new IllegalArgumentException( @@ -1038,7 +1057,7 @@ public List _getValueAliases(String valueAlias, List result) { backmap.put(mappedItem, item); } } - valueAlias = (String) backmap.get(valueAlias); + valueAlias = backmap.get(valueAlias); temp.clear(); return filter.addUnique(property.getValueAliases(valueAlias, temp), result); } @@ -1065,7 +1084,7 @@ public final List addUnique(Collection source, List resu if (result == null) result = new ArrayList<>(1); Iterator it = source.iterator(); while (it.hasNext()) { - UnicodeProperty.addUnique(remap((String) it.next()), result); + UnicodeProperty.addUnique(remap(it.next()), result); } return result; } @@ -1305,7 +1324,7 @@ public SimpleProperty setValues(String[] valueAliases, String[] alternateValueAl public SimpleProperty setValues(List valueAliases) { this.values = new LinkedHashSet<>(valueAliases); for (Iterator it = this.values.iterator(); it.hasNext(); ) { - _addToValues((String) it.next(), null); + _addToValues(it.next(), null); } return this; } @@ -1321,7 +1340,7 @@ protected void _fillValues() { List newvalues = getUnicodeMap_internal().getAvailableValues(new ArrayList()); for (Iterator it = newvalues.iterator(); it.hasNext(); ) { - _addToValues((String) it.next(), null); + _addToValues(it.next(), null); } } @@ -1380,7 +1399,7 @@ public UnicodeMapProperty set(UnicodeMap map) { @Override protected String _getValue(int codepoint) { - return (String) unicodeMap.getValue(codepoint); + return unicodeMap.getValue(codepoint); } /* protected List _getValueAliases(String valueAlias, List result) { @@ -1407,7 +1426,7 @@ public boolean isValidValue(String propertyValue) { if (isType(STRING_OR_MISC_MASK)) { return true; } - Collection values = (Collection) getAvailableValues(); + Collection values = getAvailableValues(); for (String valueAlias : values) { if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) { return true; @@ -1426,7 +1445,7 @@ public List getValueAliases() { if (isType(STRING_OR_MISC_MASK)) { return result; } - Collection values = (Collection) getAvailableValues(); + Collection values = getAvailableValues(); for (String valueAlias : values) { UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result); } From 74014266c3846b08361b8bc9588ee038ebd9e886 Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 25 Nov 2023 07:44:55 -0800 Subject: [PATCH 2/6] Prevent deva,beng from working; run spotless --- .../unicode/jsptest/TestScriptExtensions.java | 31 ++++++++++++++++--- .../org/unicode/jsptest/TestUnicodeSet.java | 8 ----- .../org/unicode/props/UnicodeProperty.java | 29 ++++++++++------- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java index 503b90f29..c44850f63 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java @@ -7,10 +7,33 @@ public class TestScriptExtensions extends TestFmwkMinusMinus { @Test - public void TestBasic() { + public void TestScx1Script() { // As of 2023-11-24, scx was not working properly - String setA = "\\p{scx=deva}"; - UnicodeSet deva = UnicodeSetUtilities.parseUnicodeSet(setA); - assertTrue(setA + "contains \\u1CD5", deva.contains(0x1cd5)); + String unicodeSetString = "\\p{scx=deva}"; + UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); + + UnicodeSet mustContain = new UnicodeSet("[ᳵ।]"); // one character B&D, other B&D&D&G&... + assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain)); + + UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bengali character + assertFalse( + unicodeSetString + " !contains " + mustNotContain, + parsed.containsAll(mustNotContain)); + } + + @Test + public void TestScxMulti() { + // As of 2023-11-24, scx was not working properly + String unicodeSetString = "\\p{scx=beng,deva}"; + String exceptionMessage = null; + try { + UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); + } catch (Exception e) { + exceptionMessage = e.getMessage(); + } + assertEquals( + "Expected exception", + "Multivalued property values can't contain commas.", + exceptionMessage); } } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java index e05911654..d0b97a857 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java @@ -413,14 +413,6 @@ public void TestPerMill(final String name, final Charset charset) { } } - @Test - public void TestScriptSpecials() { - // UnicodeSet set = UnicodeSetUtilities.parseUnicodeSet("[:scs=Hant:]"); - // assertNotEquals("Hant", 0, set.size()); - UnicodeSet set2 = UnicodeSetUtilities.parseUnicodeSet("[:scx=Arab,Syrc:]"); - assertNotEquals("Arab Syrc", 0, set2.size()); - } - @Test public void TestGC() { Map> SPECIAL_GC = diff --git a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java index 71500c366..615986a7a 100644 --- a/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java +++ b/unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java @@ -152,9 +152,9 @@ public static synchronized void ResetCacheProperties() { private Map valueToFirstValueAlias = null; private boolean hasUniformUnassigned = true; - + private boolean isMultivalued = false; - + public UnicodeProperty setMultivalued(boolean value) { isMultivalued = value; return this; @@ -387,10 +387,15 @@ public final UnicodeSet getSet(PatternMatcher matcher) { * the original contents. */ public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { - return getSet( - new SimpleMatcher( - propertyValue, isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR), - result); + if (isMultivalued && propertyValue.contains(",")) { + throw new IllegalArgumentException("Multivalued property values can't contain commas."); + } else { + return getSet( + new SimpleMatcher( + propertyValue, + isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR), + result); + } } private UnicodeMap unicodeMap = null; @@ -423,12 +428,12 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { String value2 = it2.next(); // System.out.println("Values:" + value2); if (isMultivalued && value2.contains(",")) { - for (String part : SPLIT_COMMAS.split(value2)) { - if (matcher.test(part) || matcher.test(toSkeleton(part))) { - um.keySet(value, result); - continue main; - } - } + for (String part : SPLIT_COMMAS.split(value2)) { + if (matcher.test(part) || matcher.test(toSkeleton(part))) { + um.keySet(value, result); + continue main; + } + } } else if (matcher.test(value2) || matcher.test(toSkeleton(value2))) { um.keySet(value, result); continue main; From 8cea65328b93083b1a6ff0ebadc2ccb2ba9cc2d6 Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 25 Nov 2023 15:56:47 -0800 Subject: [PATCH 3/6] Add exemplars as second example --- .../org/unicode/jsp/XPropertyFactory.java | 95 +++++++++++++++++++ ...ptExtensions.java => TestMultivalued.java} | 16 +++- 2 files changed, 110 insertions(+), 1 deletion(-) rename UnicodeJsps/src/test/java/org/unicode/jsptest/{TestScriptExtensions.java => TestMultivalued.java} (69%) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index 4245781e4..676242d7e 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -1,6 +1,10 @@ package org.unicode.jsp; +import com.google.common.base.Joiner; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.dev.util.UnicodeMap.EntryRange; import com.ibm.icu.lang.CharSequences; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty.NameChoice; @@ -12,13 +16,19 @@ import com.ibm.icu.text.Transform; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; +import com.ibm.icu.util.LocaleData; import com.ibm.icu.util.ULocale; import com.ibm.icu.util.VersionInfo; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; import java.util.Locale; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; import org.unicode.idna.Idna.IdnaType; import org.unicode.idna.Idna2003; import org.unicode.idna.Idna2008; @@ -28,9 +38,13 @@ import org.unicode.props.UnicodeProperty.BaseProperty; import org.unicode.props.UnicodeProperty.Factory; import org.unicode.props.UnicodeProperty.SimpleProperty; +import org.unicode.text.utility.Utility; public class XPropertyFactory extends UnicodeProperty.Factory { + private static final Joiner JOIN_COMMAS = Joiner.on(","); + private static final boolean DEBUG_MULTI = false; + static final UnicodeSet ALL = new UnicodeSet("[[:^C:][:Cc:][:Cf:][:noncharactercodepoint:]]").freeze(); @@ -250,6 +264,9 @@ public String transform(Integer source) { .setMain("bmp", "bmp", UnicodeProperty.BINARY, "6.0")); addCollationProperty(); + addExamplarProperty(LocaleData.ES_STANDARD, "exem", "exemplar"); + addExamplarProperty(LocaleData.ES_AUXILIARY, "exema", "exemplar_aux"); + addExamplarProperty(LocaleData.ES_PUNCTUATION, "exemp", "exemplar_punct"); // set up the special script property UnicodeProperty scriptProp = base.getProperty("sc"); @@ -301,6 +318,84 @@ public String transform(Integer source) { .setMain("RGI_Emoji", "RGI_Emoji", UnicodeProperty.BINARY, "13.0")); } + private void addExamplarProperty( + int exemplarType, String propertyAbbreviation, String propertyName) { + Multimap data = TreeMultimap.create(); + Set localeSet = new TreeSet<>(); + + for (ULocale ulocale : ULocale.getAvailableLocales()) { + if (!ulocale.getCountry().isEmpty()) { + continue; + // we want to skip cases where characters are in the parent locale, but there is no + // ULocale parentLocale = ulocale.getParent(); + } + UnicodeSet exemplarSet = LocaleData.getExemplarSet(ulocale, 0, exemplarType); + if (!ulocale.getScript().isEmpty()) { + // we can't find out the parent locale or defaultContent locale in ICU, so we hack + // it + String langLocale = ulocale.getLanguage(); + UnicodeSet langExemplarSet = + LocaleData.getExemplarSet(new ULocale(langLocale), 0, exemplarType); + if (langExemplarSet.equals(exemplarSet)) { + continue; + } + } + String locale = ulocale.toString(); + localeSet.add(locale); + for (UnicodeSetIterator it = new UnicodeSetIterator(exemplarSet); it.nextRange(); ) { + if (it.codepoint == UnicodeSetIterator.IS_STRING) { + // flatten + int cp = 0; + for (int i = 0; i < it.string.length(); i += Character.charCount(cp)) { + cp = it.string.codePointAt(i); + data.put(cp, locale); + } + } else { + for (int cp = it.codepoint; cp <= it.codepointEnd; ++cp) { + data.put(cp, locale); + } + } + } + } + + // convert to UnicodeMap + UnicodeMap unicodeMap = new UnicodeMap<>(); + for (Entry> entry : data.asMap().entrySet()) { + String value = JOIN_COMMAS.join(entry.getValue()).intern(); + unicodeMap.put(entry.getKey(), value); + } + if (DEBUG_MULTI) { + System.out.println("\n" + propertyName); + for (EntryRange entry : unicodeMap.entryRanges()) { + System.out.println( + Utility.hex(entry.codepoint) + + (entry.codepoint == entry.codepointEnd + ? "" + : "-" + Utility.hex(entry.codepointEnd)) + + " ;\t" + + entry.value); + } + } + + // put locales into right format + String[] localeList = localeSet.toArray(new String[localeSet.size()]); + String[][] locales = new String[][] {localeList, localeList}; // abbreviations are the same + + + add( + new UnicodeProperty.UnicodeMapProperty() + .set(unicodeMap) + .setMain( + propertyName, + propertyAbbreviation, + UnicodeProperty.ENUMERATED, + "1.1") + .addValueAliases( + locales, + AliasAddAction.ADD_MAIN_ALIAS) + .setMultivalued(true)); + } + private void addCollationProperty() { RuleBasedCollator c = UnicodeSetUtilities.RAW_COLLATOR; // (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java similarity index 69% rename from UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java rename to UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java index c44850f63..5c0104e1e 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestScriptExtensions.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java @@ -5,7 +5,7 @@ import org.unicode.jsp.UnicodeSetUtilities; import org.unicode.unittest.TestFmwkMinusMinus; -public class TestScriptExtensions extends TestFmwkMinusMinus { +public class TestMultivalued extends TestFmwkMinusMinus { @Test public void TestScx1Script() { // As of 2023-11-24, scx was not working properly @@ -36,4 +36,18 @@ public void TestScxMulti() { "Multivalued property values can't contain commas.", exceptionMessage); } + + @Test + public void TestExemplars() { + String unicodeSetString = "\\p{exem=da}"; + UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); + + UnicodeSet mustContain = new UnicodeSet("[æ]"); + assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain)); + + UnicodeSet mustNotContain = new UnicodeSet("[ç]"); + assertFalse( + unicodeSetString + " !contains " + mustNotContain, + parsed.containsAll(mustNotContain)); + } } From e1bec956f0f03e8035f2bd1f394fa73b577ff304 Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 25 Nov 2023 16:03:56 -0800 Subject: [PATCH 4/6] Spotless --- .../src/main/java/org/unicode/jsp/XPropertyFactory.java | 5 +---- .../src/test/java/org/unicode/jsptest/TestMultivalued.java | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index 676242d7e..9c91bdf2f 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -381,7 +381,6 @@ private void addExamplarProperty( String[] localeList = localeSet.toArray(new String[localeSet.size()]); String[][] locales = new String[][] {localeList, localeList}; // abbreviations are the same - add( new UnicodeProperty.UnicodeMapProperty() .set(unicodeMap) @@ -390,9 +389,7 @@ private void addExamplarProperty( propertyAbbreviation, UnicodeProperty.ENUMERATED, "1.1") - .addValueAliases( - locales, - AliasAddAction.ADD_MAIN_ALIAS) + .addValueAliases(locales, AliasAddAction.ADD_MAIN_ALIAS) .setMultivalued(true)); } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java index 5c0104e1e..e4f531da3 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java @@ -36,12 +36,12 @@ public void TestScxMulti() { "Multivalued property values can't contain commas.", exceptionMessage); } - + @Test public void TestExemplars() { String unicodeSetString = "\\p{exem=da}"; UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); - + UnicodeSet mustContain = new UnicodeSet("[æ]"); assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain)); From 5d6fc6315bc8ba88c7d17a43286602b23fd95fb7 Mon Sep 17 00:00:00 2001 From: macchiati Date: Mon, 27 Nov 2023 15:24:34 -0800 Subject: [PATCH 5/6] Fixes for Markus's review --- .../src/main/java/org/unicode/jsp/XPropertyFactory.java | 7 +++---- .../src/test/java/org/unicode/jsptest/TestMultivalued.java | 2 -- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java index 9c91bdf2f..b7232c295 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java @@ -4,7 +4,6 @@ import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; import com.ibm.icu.dev.util.UnicodeMap; -import com.ibm.icu.dev.util.UnicodeMap.EntryRange; import com.ibm.icu.lang.CharSequences; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty.NameChoice; @@ -324,7 +323,7 @@ private void addExamplarProperty( Set localeSet = new TreeSet<>(); for (ULocale ulocale : ULocale.getAvailableLocales()) { - if (!ulocale.getCountry().isEmpty()) { + if (!ulocale.getCountry().isEmpty() || !ulocale.getVariant().isEmpty()) { continue; // we want to skip cases where characters are in the parent locale, but there is no // ULocale parentLocale = ulocale.getParent(); @@ -340,7 +339,7 @@ private void addExamplarProperty( continue; } } - String locale = ulocale.toString(); + String locale = ulocale.toLanguageTag(); localeSet.add(locale); for (UnicodeSetIterator it = new UnicodeSetIterator(exemplarSet); it.nextRange(); ) { if (it.codepoint == UnicodeSetIterator.IS_STRING) { @@ -366,7 +365,7 @@ private void addExamplarProperty( } if (DEBUG_MULTI) { System.out.println("\n" + propertyName); - for (EntryRange entry : unicodeMap.entryRanges()) { + for (UnicodeMap.EntryRange entry : unicodeMap.entryRanges()) { System.out.println( Utility.hex(entry.codepoint) + (entry.codepoint == entry.codepointEnd diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java index e4f531da3..f5c4373b0 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java @@ -8,7 +8,6 @@ public class TestMultivalued extends TestFmwkMinusMinus { @Test public void TestScx1Script() { - // As of 2023-11-24, scx was not working properly String unicodeSetString = "\\p{scx=deva}"; UnicodeSet parsed = UnicodeSetUtilities.parseUnicodeSet(unicodeSetString); @@ -23,7 +22,6 @@ public void TestScx1Script() { @Test public void TestScxMulti() { - // As of 2023-11-24, scx was not working properly String unicodeSetString = "\\p{scx=beng,deva}"; String exceptionMessage = null; try { From 2d69f506254c07fd28e646e4c4087a078eeb4912 Mon Sep 17 00:00:00 2001 From: macchiati Date: Mon, 27 Nov 2023 15:26:23 -0800 Subject: [PATCH 6/6] Fix Bangla comment also --- .../src/test/java/org/unicode/jsptest/TestMultivalued.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java index f5c4373b0..8ed9706ef 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestMultivalued.java @@ -14,7 +14,7 @@ public void TestScx1Script() { UnicodeSet mustContain = new UnicodeSet("[ᳵ।]"); // one character B&D, other B&D&D&G&... assertTrue(unicodeSetString + " contains " + mustContain, parsed.containsAll(mustContain)); - UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bengali character + UnicodeSet mustNotContain = new UnicodeSet("[ক]"); // one Bangla character assertFalse( unicodeSetString + " !contains " + mustNotContain, parsed.containsAll(mustNotContain));