diff --git a/UnicodeJsps/pom.xml b/UnicodeJsps/pom.xml index 83d01106f..e09b537c6 100644 --- a/UnicodeJsps/pom.xml +++ b/UnicodeJsps/pom.xml @@ -29,7 +29,7 @@ com.ibm.icu - icu4j-for-cldr + icu4j diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/Confusables.java b/UnicodeJsps/src/main/java/org/unicode/jsp/Confusables.java index 3f6f16a9c..61d284935 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/Confusables.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/Confusables.java @@ -1,6 +1,5 @@ package org.unicode.jsp; -import com.ibm.icu.dev.util.CollectionUtilities; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.impl.Utility; import com.ibm.icu.text.Normalizer; @@ -41,7 +40,7 @@ public static UnicodeMap getMap() { UnicodeMap result = new UnicodeMap(); for (String s : equivalents) { Set others = new TreeSet(equivalents.getEquivalences(s)); - String list = "\u2051" + CollectionUtilities.join(others, "\u2051") + "\u2051"; + String list = "\u2051" + String.join("\u2051", others) + "\u2051"; for (String other : others) { result.put(other, list); } diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java b/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java index f5dd6d9e6..7d3a191d4 100644 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java +++ b/UnicodeJsps/src/main/java/org/unicode/jsp/ScriptTester.java @@ -1,6 +1,5 @@ package org.unicode.jsp; -import com.ibm.icu.dev.util.CollectionUtilities; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; @@ -391,7 +390,7 @@ public static String getNames( for (int i = value.nextSetBit(0); i >= 0; i = value.nextSetBit(i + 1)) { names.add(ScriptTester.getScriptName(i, choice)); } - return CollectionUtilities.join(names, separator).toString(); + return String.join(separator, names); } } diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java index c9a810386..59b98a53b 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestProperties.java @@ -287,7 +287,7 @@ public void TestCCC() { XPropertyFactory factory = XPropertyFactory.make(); checkProperty(factory, "ccc"); - String test = "[:ccc=/3/:]"; + String test = "[:Udev:ccc=/3/:]"; UnicodeSet actual = UnicodeSetUtilities.parseUnicodeSet(test); UnicodeSet expected = new UnicodeSet(); for (int i = 0; i < 256; ++i) { diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java index e18292a9e..a68acbc10 100644 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java +++ b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestUnicodeSet.java @@ -24,6 +24,7 @@ import java.util.SortedMap; import java.util.TreeSet; import java.util.stream.Stream; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIf; import org.junit.jupiter.params.ParameterizedTest; @@ -375,6 +376,7 @@ public void TestGC() { } @Test + @Disabled("Stop using ICU for properties: version skew") public void TestNF() { for (String nf : new String[] {"d", "c", "kd", "kc"}) { checkSetsEqual("[:isnf" + nf + ":]", "[:nf" + nf + "qc!=N:]"); diff --git a/pom.xml b/pom.xml index ccc7030f7..88cbc1ea6 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ For ICU versions, see https://github.com/orgs/unicode-org/packages?repo_name=icu Note that we can't use the general ICU maven packages, because utilities isn't exported (yet). --> - 72.0.1-SNAPSHOT-cldr-2022-08-17 + 76.0.1-SNAPSHOT com.ibm.icu - icu4j-for-cldr + icu4j ${icu.version} diff --git a/unicodetools-testutils/pom.xml b/unicodetools-testutils/pom.xml index bbb94d70b..11289fdf3 100644 --- a/unicodetools-testutils/pom.xml +++ b/unicodetools-testutils/pom.xml @@ -18,7 +18,7 @@ com.ibm.icu - icu4j-for-cldr + icu4j com.ibm.icu diff --git a/unicodetools/pom.xml b/unicodetools/pom.xml index 286fe393b..5a03813a4 100644 --- a/unicodetools/pom.xml +++ b/unicodetools/pom.xml @@ -18,7 +18,7 @@ com.ibm.icu - icu4j-for-cldr + icu4j diff --git a/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java b/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java new file mode 100644 index 000000000..79ff7a633 --- /dev/null +++ b/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java @@ -0,0 +1,731 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2015, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.util; + +// This file was migrated from the ICU4J repo, +// path icu4j/main/framework/src/test/java/com/ibm/icu/dev/util/CollectionUtilities.java + +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSetIterator; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.regex.Matcher; + +/** + * Utilities that ought to be on collections, but aren't + * + * @internal CLDR + */ +public final class CollectionUtilities { + + /** + * Join an array of items. + * + * @param + * @param array + * @param separator + * @return string + */ + public static String join(T[] array, String separator) { + StringBuffer result = new StringBuffer(); + for (int i = 0; i < array.length; ++i) { + if (i != 0) result.append(separator); + result.append(array[i]); + } + return result.toString(); + } + + /** + * Join a collection of items. + * + * @param + * @param collection + * @param + * @param array + * @param separator + * @return string + */ + public static > String join(U collection, String separator) { + StringBuffer result = new StringBuffer(); + boolean first = true; + for (Iterator it = collection.iterator(); it.hasNext(); ) { + if (first) first = false; + else result.append(separator); + result.append(it.next()); + } + return result.toString(); + } + + /** + * Utility like Arrays.asList() + * + * @param source + * @param target + * @param reverse + * @param + * @return + */ + public static Map asMap(T[][] source, Map target, boolean reverse) { + int from = 0, to = 1; + if (reverse) { + from = 1; + to = 0; + } + for (int i = 0; i < source.length; ++i) { + target.put(source[i][from], source[i][to]); + } + return target; + } + + /** + * Add all items in iterator to target collection + * + * @param + * @param + * @param source + * @param target + * @return + */ + public static > U addAll(Iterator source, U target) { + while (source.hasNext()) { + target.add(source.next()); + } + return target; // for chaining + } + + /** + * Get the size of an iterator (number of items in it). + * + * @param source + * @return + */ + public static int size(Iterator source) { + int result = 0; + while (source.hasNext()) { + source.next(); + ++result; + } + return result; + } + + /** + * @param + * @param source + * @return + */ + public static Map asMap(T[][] source) { + return asMap(source, new HashMap(), false); + } + + /** + * Utility that ought to be on Map + * + * @param m + * @param itemsToRemove + * @param + * @param + * @return map passed in + */ + public static Map removeAll(Map m, Collection itemsToRemove) { + for (Iterator it = itemsToRemove.iterator(); it.hasNext(); ) { + Object item = it.next(); + m.remove(item); + } + return m; + } + + /** + * Get first item in collection, or null if there is none. + * + * @param + * @param + * @param c + * @return first item + */ + public > T getFirst(U c) { + Iterator it = c.iterator(); + if (!it.hasNext()) return null; + return it.next(); + } + + /** + * Get the "best" in collection. That is the least if direction is < 0, otherwise the greatest. + * The first is chosen if there are multiples. + * + * @param + * @param + * @param c + * @param comp + * @param direction + * @return + */ + public static > T getBest(U c, Comparator comp, int direction) { + Iterator it = c.iterator(); + if (!it.hasNext()) return null; + T bestSoFar = it.next(); + if (direction < 0) { + while (it.hasNext()) { + T item = it.next(); + int compValue = comp.compare(item, bestSoFar); + if (compValue < 0) { + bestSoFar = item; + } + } + } else { + while (it.hasNext()) { + T item = it.next(); + int compValue = comp.compare(item, bestSoFar); + if (compValue > 0) { + bestSoFar = item; + } + } + } + return bestSoFar; + } + + /** + * Matches item. + * + * @param + */ + public interface ObjectMatcher { + /** + * Must handle null, never throw exception + * + * @param o + * @return + */ + boolean matches(T o); + } + + /** + * Reverse a match + * + * @param + */ + public static class InverseMatcher implements ObjectMatcher { + ObjectMatcher other; + /** + * @param toInverse + * @return + */ + public ObjectMatcher set(ObjectMatcher toInverse) { + other = toInverse; + return this; + } + + public boolean matches(T value) { + return !other.matches(value); + } + } + + /** + * Remove matching items + * + * @param + * @param + * @param c + * @param f + * @return + */ + public static > U removeAll(U c, ObjectMatcher f) { + for (Iterator it = c.iterator(); it.hasNext(); ) { + T item = it.next(); + if (f.matches(item)) it.remove(); + } + return c; + } + + /** + * Retain matching items + * + * @param + * @param + * @param c + * @param f + * @return + */ + public static > U retainAll(U c, ObjectMatcher f) { + for (Iterator it = c.iterator(); it.hasNext(); ) { + T item = it.next(); + if (!f.matches(item)) it.remove(); + } + return c; + } + + /** + * @param a + * @param b + * @return + */ + public static boolean containsSome(Collection a, Collection b) { + // fast paths + if (a.size() == 0 || b.size() == 0) return false; + if (a == b) return true; // must test after size test. + + if (a instanceof SortedSet && b instanceof SortedSet) { + SortedSet aa = (SortedSet) a; + SortedSet bb = (SortedSet) b; + Comparator bbc = bb.comparator(); + Comparator aac = aa.comparator(); + if (bbc == null && aac == null) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 + Comparable bo = (Comparable) bi.next(); + while (true) { + int rel = ao.compareTo(bo); + if (rel < 0) { + if (!ai.hasNext()) return false; + ao = (Comparable) ai.next(); + } else if (rel > 0) { + if (!bi.hasNext()) return false; + bo = (Comparable) bi.next(); + } else { + return true; + } + } + } else if (bbc.equals(a)) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Object ao = ai.next(); // these are ok, since the sizes are != 0 + Object bo = bi.next(); + while (true) { + int rel = aac.compare(ao, bo); + if (rel < 0) { + if (!ai.hasNext()) return false; + ao = ai.next(); + } else if (rel > 0) { + if (!bi.hasNext()) return false; + bo = bi.next(); + } else { + return true; + } + } + } + } + for (Iterator it = a.iterator(); it.hasNext(); ) { + if (b.contains(it.next())) return true; + } + return false; + } + + public static boolean containsAll(Collection a, Collection b) { + // fast paths + if (a == b) return true; + if (b.size() == 0) return true; + if (a.size() < b.size()) return false; + + if (a instanceof SortedSet && b instanceof SortedSet) { + SortedSet aa = (SortedSet) a; + SortedSet bb = (SortedSet) b; + Comparator bbc = bb.comparator(); + Comparator aac = aa.comparator(); + if (bbc == null && aac == null) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 + Comparable bo = (Comparable) bi.next(); + while (true) { + int rel = ao.compareTo(bo); + if (rel == 0) { + if (!bi.hasNext()) return true; + if (!ai.hasNext()) return false; + bo = (Comparable) bi.next(); + ao = (Comparable) ai.next(); + } else if (rel < 0) { + if (!ai.hasNext()) return false; + ao = (Comparable) ai.next(); + } else { + return false; + } + } + } else if (bbc.equals(aac)) { + Iterator ai = aa.iterator(); + Iterator bi = bb.iterator(); + Object ao = ai.next(); // these are ok, since the sizes are != 0 + Object bo = bi.next(); + while (true) { + int rel = aac.compare(ao, bo); + if (rel == 0) { + if (!bi.hasNext()) return true; + if (!ai.hasNext()) return false; + bo = bi.next(); + ao = ai.next(); + } else if (rel < 0) { + if (!ai.hasNext()) return false; + ao = ai.next(); + } else { + return false; + } + } + } + } + return a.containsAll(b); + } + + public static boolean containsNone(Collection a, Collection b) { + return !containsSome(a, b); + } + + /** Used for results of getContainmentRelation */ + public static final int ALL_EMPTY = 0, + NOT_A_SUPERSET_B = 1, + NOT_A_DISJOINT_B = 2, + NOT_A_SUBSET_B = 4, + NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B, + A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B, + A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B, + A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B; + + /** + * Assesses all the possible containment relations between collections A and B with one call. + *
+ * Returns an int with bits set, according to a "Venn Diagram" view of A vs B.
+ * NOT_A_SUPERSET_B: a - b != {}
+ * NOT_A_DISJOINT_B: a * b != {} // * is intersects
+ * NOT_A_SUBSET_B: b - a != {}
+ * Thus the bits can be used to get the following relations:
+ * for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0
+ * for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0
+ * for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0
+ * for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0
+ * for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0
+ */ + public static int getContainmentRelation(Collection a, Collection b) { + if (a.size() == 0) { + return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B; + } else if (b.size() == 0) { + return NOT_A_SUBSET_B; + } + int result = 0; + // WARNING: one might think that the following can be short-circuited, by looking at + // the sizes of a and b. However, this would fail in general, where a different comparator + // is being + // used in the two collections. Unfortunately, there is no failsafe way to test for that. + for (Iterator it = a.iterator(); result != 6 && it.hasNext(); ) { + result |= (b.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUBSET_B; + } + for (Iterator it = b.iterator(); (result & 3) != 3 && it.hasNext(); ) { + result |= (a.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUPERSET_B; + } + return result; + } + + public static String remove(String source, UnicodeSet removals) { + StringBuffer result = new StringBuffer(); + int cp; + for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { + cp = UTF16.charAt(source, i); + if (!removals.contains(cp)) UTF16.append(result, cp); + } + return result.toString(); + } + + /** + * Does one string contain another, starting at a specific offset? + * + * @param text + * @param offset + * @param other + * @return + */ + public static int matchesAt(CharSequence text, int offset, CharSequence other) { + int len = other.length(); + int i = 0; + int j = offset; + for (; i < len; ++i, ++j) { + char pc = other.charAt(i); + char tc = text.charAt(j); + if (pc != tc) return -1; + } + return i; + } + + /** + * Returns the ending offset found by matching characters with testSet, until a position is + * found that doen't match + * + * @param string + * @param offset + * @param testSet + * @return + */ + public int span(CharSequence string, int offset, UnicodeSet testSet) { + while (true) { + int newOffset = testSet.matchesAt(string, offset); + if (newOffset < 0) return offset; + } + } + + /** + * Returns the ending offset found by matching characters with testSet, until a position is + * found that does match + * + * @param string + * @param offset + * @param testSet + * @return + */ + public int spanNot(CharSequence string, int offset, UnicodeSet testSet) { + while (true) { + int newOffset = testSet.matchesAt(string, offset); + if (newOffset >= 0) return offset; + ++offset; // try next character position + // we don't have to worry about surrogates for this. + } + } + + /** + * Modifies Unicode set to flatten the strings. Eg [abc{da}] => [abcd] Returns the set for + * chaining. + * + * @param exemplar1 + * @return + */ + public static UnicodeSet flatten(UnicodeSet exemplar1) { + UnicodeSet result = new UnicodeSet(); + boolean gotString = false; + for (UnicodeSetIterator it = new UnicodeSetIterator(exemplar1); it.nextRange(); ) { + if (it.codepoint == UnicodeSetIterator.IS_STRING) { + result.addAll(it.string); + gotString = true; + } else { + result.add(it.codepoint, it.codepointEnd); + } + } + if (gotString) exemplar1.set(result); + return exemplar1; + } + + /** For producing filtered iterators */ + public abstract static class FilteredIterator implements Iterator { + private Iterator baseIterator; + private static final Object EMPTY = new Object(); + private static final Object DONE = new Object(); + private Object nextObject = EMPTY; + + public FilteredIterator set(Iterator baseIterator) { + this.baseIterator = baseIterator; + return this; + } + + public void remove() { + throw new UnsupportedOperationException("Doesn't support removal"); + } + + public Object next() { + Object result = nextObject; + nextObject = EMPTY; + return result; + } + + public boolean hasNext() { + if (nextObject == DONE) return false; + if (nextObject != EMPTY) return true; + while (baseIterator.hasNext()) { + nextObject = baseIterator.next(); + if (isIncluded(nextObject)) { + return true; + } + } + nextObject = DONE; + return false; + } + + public abstract boolean isIncluded(Object item); + } + + public static class PrefixIterator extends FilteredIterator { + private String prefix; + + public PrefixIterator set(Iterator baseIterator, String prefix) { + super.set(baseIterator); + this.prefix = prefix; + return this; + } + + public boolean isIncluded(Object item) { + return ((String) item).startsWith(prefix); + } + } + + public static class RegexIterator extends FilteredIterator { + private Matcher matcher; + + public RegexIterator set(Iterator baseIterator, Matcher matcher) { + super.set(baseIterator); + this.matcher = matcher; + return this; + } + + public boolean isIncluded(Object item) { + return matcher.reset((String) item).matches(); + } + } + + /** + * Compare, allowing nulls + * + * @param a + * @param b + * @return + */ + public static boolean equals(T a, T b) { + return a == null ? b == null : b == null ? false : a.equals(b); + } + + /** + * Compare, allowing nulls and putting them first + * + * @param a + * @param b + * @return + */ + public static int compare(T a, T b) { + return a == null ? b == null ? 0 : -1 : b == null ? 1 : a.compareTo(b); + } + + /** + * Compare iterators + * + * @param iterator1 + * @param iterator2 + * @return + */ + public static int compare(Iterator iterator1, Iterator iterator2) { + int diff; + while (true) { + if (!iterator1.hasNext()) { + return iterator2.hasNext() ? -1 : 0; + } else if (!iterator2.hasNext()) { + return 1; + } + diff = CollectionUtilities.compare(iterator1.next(), iterator2.next()); + if (diff != 0) { + return diff; + } + } + } + + /** + * Compare, with shortest first, and otherwise lexicographically + * + * @param a + * @param b + * @return + */ + public static > int compare(U o1, U o2) { + int diff = o1.size() - o2.size(); + if (diff != 0) { + return diff; + } + Iterator iterator1 = o1.iterator(); + Iterator iterator2 = o2.iterator(); + return compare(iterator1, iterator2); + } + + /** + * Compare, with shortest first, and otherwise lexicographically + * + * @param a + * @param b + * @return + */ + public static > int compare(U o1, U o2) { + int diff = o1.size() - o2.size(); + if (diff != 0) { + return diff; + } + Collection x1 = SortedSet.class.isInstance(o1) ? o1 : new TreeSet(o1); + Collection x2 = SortedSet.class.isInstance(o2) ? o2 : new TreeSet(o2); + return compare(x1, x2); + } + + public static class SetComparator implements Comparator> { + public int compare(Set o1, Set o2) { + return CollectionUtilities.compare(o1, o2); + } + } + ; + + public static class CollectionComparator + implements Comparator> { + public int compare(Collection o1, Collection o2) { + return CollectionUtilities.compare(o1, o2); + } + } + ; + + /** + * Compare, allowing nulls and putting them first + * + * @param a + * @param b + * @return + */ + public static > int compare( + T a, T b) { + if (a == null) { + return b == null ? 0 : -1; + } else if (b == null) { + return 1; + } + int diff = compare(a.getKey(), b.getKey()); + if (diff != 0) { + return diff; + } + return compare(a.getValue(), b.getValue()); + } + + public static > + int compareEntrySets(Collection o1, Collection o2) { + int diff = o1.size() - o2.size(); + if (diff != 0) { + return diff; + } + Iterator iterator1 = o1.iterator(); + Iterator iterator2 = o2.iterator(); + while (true) { + if (!iterator1.hasNext()) { + return iterator2.hasNext() ? -1 : 0; + } else if (!iterator2.hasNext()) { + return 1; + } + T item1 = iterator1.next(); + T item2 = iterator2.next(); + diff = CollectionUtilities.compare(item1, item2); + if (diff != 0) { + return diff; + } + } + } + + public static class MapComparator + implements Comparator> { + public int compare(Map o1, Map o2) { + return CollectionUtilities.compareEntrySets(o1.entrySet(), o2.entrySet()); + } + } + ; + + public static class ComparableComparator implements Comparator { + public int compare(T arg0, T arg1) { + return CollectionUtilities.compare(arg0, arg1); + } + } +} diff --git a/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java b/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java new file mode 100644 index 000000000..4c40d6d96 --- /dev/null +++ b/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java @@ -0,0 +1,197 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************************* + * Copyright (C) 1996-2016, International Business Machines Corporation and * + * others. All Rights Reserved. * + ******************************************************************************* + */ +package com.ibm.icu.dev.test.util; + +// This file was migrated from the ICU4J repo, +// path icu4j/main//translit/src/test/java/com/ibm/icu/dev/test/util/TestUtilities.java + +import com.ibm.icu.dev.util.CollectionUtilities; +import java.util.Arrays; +import java.util.Collection; +import java.util.Set; +import java.util.TreeSet; +import org.junit.jupiter.api.Test; +import org.unicode.unittest.TestFmwkMinusMinus; + +public class TestCollectionUtilities extends TestFmwkMinusMinus { + @Test + public void TestCollectionUtilitySpeed() { + TreeSet ts1 = new TreeSet(); + TreeSet ts2 = new TreeSet(); + int size = 1000; + int iterations = 1000; + String prefix = "abc"; + String postfix = "nop"; + for (int i = 0; i < size; ++i) { + ts1.add(prefix + String.valueOf(i) + postfix); + ts2.add(prefix + String.valueOf(i) + postfix); + } + // warm up + CollectionUtilities.containsAll(ts1, ts2); + ts1.containsAll(ts2); + + timeAndCompare(ts1, ts2, iterations, true, .75); + // now different sets + ts1.add("Able"); + timeAndCompare(ts1, ts2, iterations, true, .75); + timeAndCompare(ts2, ts1, iterations * 100, false, 1.05); + } + + private void timeAndCompare( + TreeSet ts1, TreeSet ts2, int iterations, boolean expected, double factorOfStandard) { + double utilityTimeSorted = + timeUtilityContainsAll(iterations, ts1, ts2, expected) / (double) iterations; + double standardTimeSorted = + timeStandardContainsAll(iterations, ts1, ts2, expected) / (double) iterations; + + if (utilityTimeSorted < standardTimeSorted * factorOfStandard) { + logln( + "Sorted: Utility time (" + + utilityTimeSorted + + ") << Standard duration (" + + standardTimeSorted + + "); " + + 100 * (utilityTimeSorted / standardTimeSorted) + + "%"); + } else { + /*errln*/ logln( + "Sorted: Utility time (" + + utilityTimeSorted + + ") !<< Standard duration (" + + standardTimeSorted + + "); " + + 100 * (utilityTimeSorted / standardTimeSorted) + + "%"); + } + } + + private long timeStandardContainsAll(int iterations, Set hs1, Set hs2, boolean expected) { + long standardTime; + { + long start, end; + boolean temp = false; + + start = System.currentTimeMillis(); + for (int i = 0; i < iterations; ++i) { + temp = hs1.containsAll(hs2); + if (temp != expected) { + errln("Bad result"); + } + } + end = System.currentTimeMillis(); + standardTime = end - start; + } + return standardTime; + } + + private long timeUtilityContainsAll(int iterations, Set hs1, Set hs2, boolean expected) { + long utilityTime; + { + long start, end; + boolean temp = false; + start = System.currentTimeMillis(); + for (int i = 0; i < iterations; ++i) { + temp = CollectionUtilities.containsAll(hs1, hs2); + if (temp != expected) { + errln("Bad result"); + } + } + end = System.currentTimeMillis(); + utilityTime = end - start; + } + return utilityTime; + } + + @Test + public void TestCollectionUtilities() { + String[][] test = { + {"a", "c", "e", "g", "h", "z"}, + {"b", "d", "f", "h", "w"}, + {"a", "b"}, + {"a", "d"}, + {"d"}, + {} + }; // + int resultMask = 0; + for (int i = 0; i < test.length; ++i) { + Collection a = new TreeSet(Arrays.asList(test[i])); + for (int j = 0; j < test.length; ++j) { + Collection b = new TreeSet(Arrays.asList(test[j])); + int relation = CollectionUtilities.getContainmentRelation(a, b); + resultMask |= (1 << relation); + switch (relation) { + case CollectionUtilities.ALL_EMPTY: + checkContainment(a.size() == 0 && b.size() == 0, a, relation, b); + break; + case CollectionUtilities.NOT_A_SUPERSET_B: + checkContainment(a.size() == 0 && b.size() != 0, a, relation, b); + break; + case CollectionUtilities.NOT_A_DISJOINT_B: + checkContainment(a.equals(b) && a.size() != 0, a, relation, b); + break; + case CollectionUtilities.NOT_A_SUBSET_B: + checkContainment(a.size() != 0 && b.size() == 0, a, relation, b); + break; + case CollectionUtilities.A_PROPER_SUBSET_OF_B: + checkContainment(b.containsAll(a) && !a.equals(b), a, relation, b); + break; + case CollectionUtilities.NOT_A_EQUALS_B: + checkContainment( + !CollectionUtilities.containsSome(a, b) + && a.size() != 0 + && b.size() != 0, + a, + relation, + b); + break; + case CollectionUtilities.A_PROPER_SUPERSET_B: + checkContainment(a.containsAll(b) && !a.equals(b), a, relation, b); + break; + case CollectionUtilities.A_PROPER_OVERLAPS_B: + checkContainment( + !b.containsAll(a) + && !a.containsAll(b) + && CollectionUtilities.containsSome(a, b), + a, + relation, + b); + break; + } + } + } + if (resultMask != 0xFF) { + String missing = ""; + for (int i = 0; i < 8; ++i) { + if ((resultMask & (1 << i)) == 0) { + if (missing.length() != 0) missing += ", "; + missing += RelationName[i]; + } + } + errln("Not all ContainmentRelations checked: " + missing); + } + } + + static final String[] RelationName = { + "ALL_EMPTY", + "NOT_A_SUPERSET_B", + "NOT_A_DISJOINT_B", + "NOT_A_SUBSET_B", + "A_PROPER_SUBSET_OF_B", + "A_PROPER_DISJOINT_B", + "A_PROPER_SUPERSET_B", + "A_PROPER_OVERLAPS_B" + }; + + /** */ + private void checkContainment(boolean c, Collection a, int relation, Collection b) { + if (!c) { + errln("Fails relation: " + a + " \t" + RelationName[relation] + " \t" + b); + } + } +} diff --git a/unicodetools/src/test/java/org/unicode/text/UCD/TestCodeInvariants.java b/unicodetools/src/test/java/org/unicode/text/UCD/TestCodeInvariants.java index 9ad41c865..601354451 100644 --- a/unicodetools/src/test/java/org/unicode/text/UCD/TestCodeInvariants.java +++ b/unicodetools/src/test/java/org/unicode/text/UCD/TestCodeInvariants.java @@ -157,6 +157,9 @@ public void testGcbInDecompositions() { continue; } + // TODO: Use the Unicode Tools normalization code to fetch the Decomposition_Mapping, + // not ICU. Using ICU makes this test depend on the ICU version that the + // Maven dependency declares. final String nfdOrNull = NORM2_NFD.getDecomposition(cp); if (nfdOrNull == null || nfdOrNull.length() <= 1) { continue; @@ -165,10 +168,16 @@ public void testGcbInDecompositions() { int ch; boolean flagged = false; for (int i = 0; i < nfdOrNull.length(); i += Character.charCount(ch)) { - ch = UTF16.charAt(nfdOrNull, i); - if ((i > 0) - && (GCB.get(ch) - != UcdPropertyValues.Grapheme_Cluster_Break_Values.Extend)) { + ch = Character.codePointAt(nfdOrNull, i); + if (i == 0) { + continue; + } + // We normally expect the non-initial character in the Decomposition_Mapping + // to be an extender. + // Some Kirat Rai vowel signs use the value for Jamo vowels instead. + UcdPropertyValues.Grapheme_Cluster_Break_Values gcb = GCB.get(ch); + if (!(gcb == UcdPropertyValues.Grapheme_Cluster_Break_Values.Extend + || gcb == UcdPropertyValues.Grapheme_Cluster_Break_Values.V)) { flagged = true; testResult = TEST_FAIL; }