diff --git a/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java b/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java index 79ff7a633..b965f5511 100644 --- a/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java +++ b/unicodetools/src/main/java/com/ibm/icu/dev/util/CollectionUtilities.java @@ -13,17 +13,12 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; -import java.util.SortedSet; -import java.util.TreeSet; -import java.util.regex.Matcher; /** * Utilities that ought to be on collections, but aren't @@ -62,7 +57,7 @@ public static String join(T[] array, String separator) { public static > String join(U collection, String separator) { StringBuffer result = new StringBuffer(); boolean first = true; - for (Iterator it = collection.iterator(); it.hasNext(); ) { + for (Iterator it = collection.iterator(); it.hasNext(); ) { if (first) first = false; else result.append(separator); result.append(it.next()); @@ -91,37 +86,6 @@ public static Map asMap(T[][] source, Map target, boolean revers return target; } - /** - * Add all items in iterator to target collection - * - * @param - * @param - * @param source - * @param target - * @return - */ - public static > U addAll(Iterator source, U target) { - while (source.hasNext()) { - target.add(source.next()); - } - return target; // for chaining - } - - /** - * Get the size of an iterator (number of items in it). - * - * @param source - * @return - */ - public static int size(Iterator source) { - int result = 0; - while (source.hasNext()) { - source.next(); - ++result; - } - return result; - } - /** * @param * @param source @@ -131,37 +95,6 @@ public static Map asMap(T[][] source) { return asMap(source, new HashMap(), false); } - /** - * Utility that ought to be on Map - * - * @param m - * @param itemsToRemove - * @param - * @param - * @return map passed in - */ - public static Map removeAll(Map m, Collection itemsToRemove) { - for (Iterator it = itemsToRemove.iterator(); it.hasNext(); ) { - Object item = it.next(); - m.remove(item); - } - return m; - } - - /** - * Get first item in collection, or null if there is none. - * - * @param - * @param - * @param c - * @return first item - */ - public > T getFirst(U c) { - Iterator it = c.iterator(); - if (!it.hasNext()) return null; - return it.next(); - } - /** * Get the "best" in collection. That is the least if direction is < 0, otherwise the greatest. * The first is chosen if there are multiples. @@ -197,235 +130,6 @@ public static > T getBest(U c, Comparator comp, in return bestSoFar; } - /** - * Matches item. - * - * @param - */ - public interface ObjectMatcher { - /** - * Must handle null, never throw exception - * - * @param o - * @return - */ - boolean matches(T o); - } - - /** - * Reverse a match - * - * @param - */ - public static class InverseMatcher implements ObjectMatcher { - ObjectMatcher other; - /** - * @param toInverse - * @return - */ - public ObjectMatcher set(ObjectMatcher toInverse) { - other = toInverse; - return this; - } - - public boolean matches(T value) { - return !other.matches(value); - } - } - - /** - * Remove matching items - * - * @param - * @param - * @param c - * @param f - * @return - */ - public static > U removeAll(U c, ObjectMatcher f) { - for (Iterator it = c.iterator(); it.hasNext(); ) { - T item = it.next(); - if (f.matches(item)) it.remove(); - } - return c; - } - - /** - * Retain matching items - * - * @param - * @param - * @param c - * @param f - * @return - */ - public static > U retainAll(U c, ObjectMatcher f) { - for (Iterator it = c.iterator(); it.hasNext(); ) { - T item = it.next(); - if (!f.matches(item)) it.remove(); - } - return c; - } - - /** - * @param a - * @param b - * @return - */ - public static boolean containsSome(Collection a, Collection b) { - // fast paths - if (a.size() == 0 || b.size() == 0) return false; - if (a == b) return true; // must test after size test. - - if (a instanceof SortedSet && b instanceof SortedSet) { - SortedSet aa = (SortedSet) a; - SortedSet bb = (SortedSet) b; - Comparator bbc = bb.comparator(); - Comparator aac = aa.comparator(); - if (bbc == null && aac == null) { - Iterator ai = aa.iterator(); - Iterator bi = bb.iterator(); - Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 - Comparable bo = (Comparable) bi.next(); - while (true) { - int rel = ao.compareTo(bo); - if (rel < 0) { - if (!ai.hasNext()) return false; - ao = (Comparable) ai.next(); - } else if (rel > 0) { - if (!bi.hasNext()) return false; - bo = (Comparable) bi.next(); - } else { - return true; - } - } - } else if (bbc.equals(a)) { - Iterator ai = aa.iterator(); - Iterator bi = bb.iterator(); - Object ao = ai.next(); // these are ok, since the sizes are != 0 - Object bo = bi.next(); - while (true) { - int rel = aac.compare(ao, bo); - if (rel < 0) { - if (!ai.hasNext()) return false; - ao = ai.next(); - } else if (rel > 0) { - if (!bi.hasNext()) return false; - bo = bi.next(); - } else { - return true; - } - } - } - } - for (Iterator it = a.iterator(); it.hasNext(); ) { - if (b.contains(it.next())) return true; - } - return false; - } - - public static boolean containsAll(Collection a, Collection b) { - // fast paths - if (a == b) return true; - if (b.size() == 0) return true; - if (a.size() < b.size()) return false; - - if (a instanceof SortedSet && b instanceof SortedSet) { - SortedSet aa = (SortedSet) a; - SortedSet bb = (SortedSet) b; - Comparator bbc = bb.comparator(); - Comparator aac = aa.comparator(); - if (bbc == null && aac == null) { - Iterator ai = aa.iterator(); - Iterator bi = bb.iterator(); - Comparable ao = (Comparable) ai.next(); // these are ok, since the sizes are != 0 - Comparable bo = (Comparable) bi.next(); - while (true) { - int rel = ao.compareTo(bo); - if (rel == 0) { - if (!bi.hasNext()) return true; - if (!ai.hasNext()) return false; - bo = (Comparable) bi.next(); - ao = (Comparable) ai.next(); - } else if (rel < 0) { - if (!ai.hasNext()) return false; - ao = (Comparable) ai.next(); - } else { - return false; - } - } - } else if (bbc.equals(aac)) { - Iterator ai = aa.iterator(); - Iterator bi = bb.iterator(); - Object ao = ai.next(); // these are ok, since the sizes are != 0 - Object bo = bi.next(); - while (true) { - int rel = aac.compare(ao, bo); - if (rel == 0) { - if (!bi.hasNext()) return true; - if (!ai.hasNext()) return false; - bo = bi.next(); - ao = ai.next(); - } else if (rel < 0) { - if (!ai.hasNext()) return false; - ao = ai.next(); - } else { - return false; - } - } - } - } - return a.containsAll(b); - } - - public static boolean containsNone(Collection a, Collection b) { - return !containsSome(a, b); - } - - /** Used for results of getContainmentRelation */ - public static final int ALL_EMPTY = 0, - NOT_A_SUPERSET_B = 1, - NOT_A_DISJOINT_B = 2, - NOT_A_SUBSET_B = 4, - NOT_A_EQUALS_B = NOT_A_SUBSET_B | NOT_A_SUPERSET_B, - A_PROPER_SUBSET_OF_B = NOT_A_DISJOINT_B | NOT_A_SUPERSET_B, - A_PROPER_SUPERSET_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B, - A_PROPER_OVERLAPS_B = NOT_A_SUBSET_B | NOT_A_DISJOINT_B | NOT_A_SUPERSET_B; - - /** - * Assesses all the possible containment relations between collections A and B with one call. - *
- * Returns an int with bits set, according to a "Venn Diagram" view of A vs B.
- * NOT_A_SUPERSET_B: a - b != {}
- * NOT_A_DISJOINT_B: a * b != {} // * is intersects
- * NOT_A_SUBSET_B: b - a != {}
- * Thus the bits can be used to get the following relations:
- * for A_SUPERSET_B, use (x & CollectionUtilities.NOT_A_SUPERSET_B) == 0
- * for A_SUBSET_B, use (x & CollectionUtilities.NOT_A_SUBSET_B) == 0
- * for A_EQUALS_B, use (x & CollectionUtilities.NOT_A_EQUALS_B) == 0
- * for A_DISJOINT_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) == 0
- * for A_OVERLAPS_B, use (x & CollectionUtilities.NOT_A_DISJOINT_B) != 0
- */ - public static int getContainmentRelation(Collection a, Collection b) { - if (a.size() == 0) { - return (b.size() == 0) ? ALL_EMPTY : NOT_A_SUPERSET_B; - } else if (b.size() == 0) { - return NOT_A_SUBSET_B; - } - int result = 0; - // WARNING: one might think that the following can be short-circuited, by looking at - // the sizes of a and b. However, this would fail in general, where a different comparator - // is being - // used in the two collections. Unfortunately, there is no failsafe way to test for that. - for (Iterator it = a.iterator(); result != 6 && it.hasNext(); ) { - result |= (b.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUBSET_B; - } - for (Iterator it = b.iterator(); (result & 3) != 3 && it.hasNext(); ) { - result |= (a.contains(it.next())) ? NOT_A_DISJOINT_B : NOT_A_SUPERSET_B; - } - return result; - } - public static String remove(String source, UnicodeSet removals) { StringBuffer result = new StringBuffer(); int cp; @@ -436,159 +140,6 @@ public static String remove(String source, UnicodeSet removals) { return result.toString(); } - /** - * Does one string contain another, starting at a specific offset? - * - * @param text - * @param offset - * @param other - * @return - */ - public static int matchesAt(CharSequence text, int offset, CharSequence other) { - int len = other.length(); - int i = 0; - int j = offset; - for (; i < len; ++i, ++j) { - char pc = other.charAt(i); - char tc = text.charAt(j); - if (pc != tc) return -1; - } - return i; - } - - /** - * Returns the ending offset found by matching characters with testSet, until a position is - * found that doen't match - * - * @param string - * @param offset - * @param testSet - * @return - */ - public int span(CharSequence string, int offset, UnicodeSet testSet) { - while (true) { - int newOffset = testSet.matchesAt(string, offset); - if (newOffset < 0) return offset; - } - } - - /** - * Returns the ending offset found by matching characters with testSet, until a position is - * found that does match - * - * @param string - * @param offset - * @param testSet - * @return - */ - public int spanNot(CharSequence string, int offset, UnicodeSet testSet) { - while (true) { - int newOffset = testSet.matchesAt(string, offset); - if (newOffset >= 0) return offset; - ++offset; // try next character position - // we don't have to worry about surrogates for this. - } - } - - /** - * Modifies Unicode set to flatten the strings. Eg [abc{da}] => [abcd] Returns the set for - * chaining. - * - * @param exemplar1 - * @return - */ - public static UnicodeSet flatten(UnicodeSet exemplar1) { - UnicodeSet result = new UnicodeSet(); - boolean gotString = false; - for (UnicodeSetIterator it = new UnicodeSetIterator(exemplar1); it.nextRange(); ) { - if (it.codepoint == UnicodeSetIterator.IS_STRING) { - result.addAll(it.string); - gotString = true; - } else { - result.add(it.codepoint, it.codepointEnd); - } - } - if (gotString) exemplar1.set(result); - return exemplar1; - } - - /** For producing filtered iterators */ - public abstract static class FilteredIterator implements Iterator { - private Iterator baseIterator; - private static final Object EMPTY = new Object(); - private static final Object DONE = new Object(); - private Object nextObject = EMPTY; - - public FilteredIterator set(Iterator baseIterator) { - this.baseIterator = baseIterator; - return this; - } - - public void remove() { - throw new UnsupportedOperationException("Doesn't support removal"); - } - - public Object next() { - Object result = nextObject; - nextObject = EMPTY; - return result; - } - - public boolean hasNext() { - if (nextObject == DONE) return false; - if (nextObject != EMPTY) return true; - while (baseIterator.hasNext()) { - nextObject = baseIterator.next(); - if (isIncluded(nextObject)) { - return true; - } - } - nextObject = DONE; - return false; - } - - public abstract boolean isIncluded(Object item); - } - - public static class PrefixIterator extends FilteredIterator { - private String prefix; - - public PrefixIterator set(Iterator baseIterator, String prefix) { - super.set(baseIterator); - this.prefix = prefix; - return this; - } - - public boolean isIncluded(Object item) { - return ((String) item).startsWith(prefix); - } - } - - public static class RegexIterator extends FilteredIterator { - private Matcher matcher; - - public RegexIterator set(Iterator baseIterator, Matcher matcher) { - super.set(baseIterator); - this.matcher = matcher; - return this; - } - - public boolean isIncluded(Object item) { - return matcher.reset((String) item).matches(); - } - } - - /** - * Compare, allowing nulls - * - * @param a - * @param b - * @return - */ - public static boolean equals(T a, T b) { - return a == null ? b == null : b == null ? false : a.equals(b); - } - /** * Compare, allowing nulls and putting them first * @@ -596,7 +147,7 @@ public static boolean equals(T a, T b) { * @param b * @return */ - public static int compare(T a, T b) { + public static > int compare(T a, T b) { return a == null ? b == null ? 0 : -1 : b == null ? 1 : a.compareTo(b); } @@ -607,7 +158,8 @@ public static int compare(T a, T b) { * @param iterator2 * @return */ - public static int compare(Iterator iterator1, Iterator iterator2) { + public static > int compare( + Iterator iterator1, Iterator iterator2) { int diff; while (true) { if (!iterator1.hasNext()) { @@ -629,7 +181,7 @@ public static int compare(Iterator iterator1, Iterator * @param b * @return */ - public static > int compare(U o1, U o2) { + public static , U extends Collection> int compare(U o1, U o2) { int diff = o1.size() - o2.size(); if (diff != 0) { return diff; @@ -639,37 +191,12 @@ public static > int compare(U o1, return compare(iterator1, iterator2); } - /** - * Compare, with shortest first, and otherwise lexicographically - * - * @param a - * @param b - * @return - */ - public static > int compare(U o1, U o2) { - int diff = o1.size() - o2.size(); - if (diff != 0) { - return diff; - } - Collection x1 = SortedSet.class.isInstance(o1) ? o1 : new TreeSet(o1); - Collection x2 = SortedSet.class.isInstance(o2) ? o2 : new TreeSet(o2); - return compare(x1, x2); - } - - public static class SetComparator implements Comparator> { - public int compare(Set o1, Set o2) { - return CollectionUtilities.compare(o1, o2); - } - } - ; - - public static class CollectionComparator + public static class CollectionComparator> implements Comparator> { public int compare(Collection o1, Collection o2) { return CollectionUtilities.compare(o1, o2); } } - ; /** * Compare, allowing nulls and putting them first @@ -678,8 +205,8 @@ public int compare(Collection o1, Collection o2) { * @param b * @return */ - public static > int compare( - T a, T b) { + public static , V extends Comparable, T extends Entry> + int compare(T a, T b) { if (a == null) { return b == null ? 0 : -1; } else if (b == null) { @@ -691,41 +218,4 @@ public static } return compare(a.getValue(), b.getValue()); } - - public static > - int compareEntrySets(Collection o1, Collection o2) { - int diff = o1.size() - o2.size(); - if (diff != 0) { - return diff; - } - Iterator iterator1 = o1.iterator(); - Iterator iterator2 = o2.iterator(); - while (true) { - if (!iterator1.hasNext()) { - return iterator2.hasNext() ? -1 : 0; - } else if (!iterator2.hasNext()) { - return 1; - } - T item1 = iterator1.next(); - T item2 = iterator2.next(); - diff = CollectionUtilities.compare(item1, item2); - if (diff != 0) { - return diff; - } - } - } - - public static class MapComparator - implements Comparator> { - public int compare(Map o1, Map o2) { - return CollectionUtilities.compareEntrySets(o1.entrySet(), o2.entrySet()); - } - } - ; - - public static class ComparableComparator implements Comparator { - public int compare(T arg0, T arg1) { - return CollectionUtilities.compare(arg0, arg1); - } - } } diff --git a/unicodetools/src/main/java/org/unicode/tools/emoji/GenerateEmoji.java b/unicodetools/src/main/java/org/unicode/tools/emoji/GenerateEmoji.java index 9eee96f9c..c43e08bd7 100644 --- a/unicodetools/src/main/java/org/unicode/tools/emoji/GenerateEmoji.java +++ b/unicodetools/src/main/java/org/unicode/tools/emoji/GenerateEmoji.java @@ -10,7 +10,6 @@ import com.google.common.collect.Multimap; import com.google.common.collect.TreeMultimap; import com.ibm.icu.dev.util.CollectionUtilities; -import com.ibm.icu.dev.util.CollectionUtilities.SetComparator; import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.impl.Relation; import com.ibm.icu.impl.Row; @@ -3626,8 +3625,6 @@ public static int compareX( static final Comparator, UnicodeSet>> PAIR_SORT = new Comparator, UnicodeSet>>() { - SetComparator setComp; - public int compare(R2, UnicodeSet> o1, R2, UnicodeSet> o2) { int diff = compareX( diff --git a/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java b/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java deleted file mode 100644 index 4c40d6d96..000000000 --- a/unicodetools/src/test/java/com/ibm/icu/dev/util/TestCollectionUtilities.java +++ /dev/null @@ -1,197 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ******************************************************************************* - * Copyright (C) 1996-2016, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package com.ibm.icu.dev.test.util; - -// This file was migrated from the ICU4J repo, -// path icu4j/main//translit/src/test/java/com/ibm/icu/dev/test/util/TestUtilities.java - -import com.ibm.icu.dev.util.CollectionUtilities; -import java.util.Arrays; -import java.util.Collection; -import java.util.Set; -import java.util.TreeSet; -import org.junit.jupiter.api.Test; -import org.unicode.unittest.TestFmwkMinusMinus; - -public class TestCollectionUtilities extends TestFmwkMinusMinus { - @Test - public void TestCollectionUtilitySpeed() { - TreeSet ts1 = new TreeSet(); - TreeSet ts2 = new TreeSet(); - int size = 1000; - int iterations = 1000; - String prefix = "abc"; - String postfix = "nop"; - for (int i = 0; i < size; ++i) { - ts1.add(prefix + String.valueOf(i) + postfix); - ts2.add(prefix + String.valueOf(i) + postfix); - } - // warm up - CollectionUtilities.containsAll(ts1, ts2); - ts1.containsAll(ts2); - - timeAndCompare(ts1, ts2, iterations, true, .75); - // now different sets - ts1.add("Able"); - timeAndCompare(ts1, ts2, iterations, true, .75); - timeAndCompare(ts2, ts1, iterations * 100, false, 1.05); - } - - private void timeAndCompare( - TreeSet ts1, TreeSet ts2, int iterations, boolean expected, double factorOfStandard) { - double utilityTimeSorted = - timeUtilityContainsAll(iterations, ts1, ts2, expected) / (double) iterations; - double standardTimeSorted = - timeStandardContainsAll(iterations, ts1, ts2, expected) / (double) iterations; - - if (utilityTimeSorted < standardTimeSorted * factorOfStandard) { - logln( - "Sorted: Utility time (" - + utilityTimeSorted - + ") << Standard duration (" - + standardTimeSorted - + "); " - + 100 * (utilityTimeSorted / standardTimeSorted) - + "%"); - } else { - /*errln*/ logln( - "Sorted: Utility time (" - + utilityTimeSorted - + ") !<< Standard duration (" - + standardTimeSorted - + "); " - + 100 * (utilityTimeSorted / standardTimeSorted) - + "%"); - } - } - - private long timeStandardContainsAll(int iterations, Set hs1, Set hs2, boolean expected) { - long standardTime; - { - long start, end; - boolean temp = false; - - start = System.currentTimeMillis(); - for (int i = 0; i < iterations; ++i) { - temp = hs1.containsAll(hs2); - if (temp != expected) { - errln("Bad result"); - } - } - end = System.currentTimeMillis(); - standardTime = end - start; - } - return standardTime; - } - - private long timeUtilityContainsAll(int iterations, Set hs1, Set hs2, boolean expected) { - long utilityTime; - { - long start, end; - boolean temp = false; - start = System.currentTimeMillis(); - for (int i = 0; i < iterations; ++i) { - temp = CollectionUtilities.containsAll(hs1, hs2); - if (temp != expected) { - errln("Bad result"); - } - } - end = System.currentTimeMillis(); - utilityTime = end - start; - } - return utilityTime; - } - - @Test - public void TestCollectionUtilities() { - String[][] test = { - {"a", "c", "e", "g", "h", "z"}, - {"b", "d", "f", "h", "w"}, - {"a", "b"}, - {"a", "d"}, - {"d"}, - {} - }; // - int resultMask = 0; - for (int i = 0; i < test.length; ++i) { - Collection a = new TreeSet(Arrays.asList(test[i])); - for (int j = 0; j < test.length; ++j) { - Collection b = new TreeSet(Arrays.asList(test[j])); - int relation = CollectionUtilities.getContainmentRelation(a, b); - resultMask |= (1 << relation); - switch (relation) { - case CollectionUtilities.ALL_EMPTY: - checkContainment(a.size() == 0 && b.size() == 0, a, relation, b); - break; - case CollectionUtilities.NOT_A_SUPERSET_B: - checkContainment(a.size() == 0 && b.size() != 0, a, relation, b); - break; - case CollectionUtilities.NOT_A_DISJOINT_B: - checkContainment(a.equals(b) && a.size() != 0, a, relation, b); - break; - case CollectionUtilities.NOT_A_SUBSET_B: - checkContainment(a.size() != 0 && b.size() == 0, a, relation, b); - break; - case CollectionUtilities.A_PROPER_SUBSET_OF_B: - checkContainment(b.containsAll(a) && !a.equals(b), a, relation, b); - break; - case CollectionUtilities.NOT_A_EQUALS_B: - checkContainment( - !CollectionUtilities.containsSome(a, b) - && a.size() != 0 - && b.size() != 0, - a, - relation, - b); - break; - case CollectionUtilities.A_PROPER_SUPERSET_B: - checkContainment(a.containsAll(b) && !a.equals(b), a, relation, b); - break; - case CollectionUtilities.A_PROPER_OVERLAPS_B: - checkContainment( - !b.containsAll(a) - && !a.containsAll(b) - && CollectionUtilities.containsSome(a, b), - a, - relation, - b); - break; - } - } - } - if (resultMask != 0xFF) { - String missing = ""; - for (int i = 0; i < 8; ++i) { - if ((resultMask & (1 << i)) == 0) { - if (missing.length() != 0) missing += ", "; - missing += RelationName[i]; - } - } - errln("Not all ContainmentRelations checked: " + missing); - } - } - - static final String[] RelationName = { - "ALL_EMPTY", - "NOT_A_SUPERSET_B", - "NOT_A_DISJOINT_B", - "NOT_A_SUBSET_B", - "A_PROPER_SUBSET_OF_B", - "A_PROPER_DISJOINT_B", - "A_PROPER_SUPERSET_B", - "A_PROPER_OVERLAPS_B" - }; - - /** */ - private void checkContainment(boolean c, Collection a, int relation, Collection b) { - if (!c) { - errln("Fails relation: " + a + " \t" + RelationName[relation] + " \t" + b); - } - } -}