Skip to content

Commit

Permalink
Merge branch '2.0.X'
Browse files Browse the repository at this point in the history
  • Loading branch information
leerho committed Feb 5, 2021
2 parents e8987b3 + 9de80d7 commit ac00afa
Show file tree
Hide file tree
Showing 24 changed files with 1,098 additions and 933 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ under the License.

<groupId>org.apache.datasketches</groupId>
<artifactId>datasketches-java</artifactId>
<version>1.4.0-SNAPSHOT</version>
<version>2.0.0</version>
<packaging>jar</packaging>

<name>${project.artifactId}</name>
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/org/apache/datasketches/theta/AnotB.java
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,21 @@ public Family getFamily() {
public abstract void notB(Sketch skB);

/**
* Gets the result of the mutistep, stateful operation AnotB that have been executed with calls
* Gets the result of the multistep, stateful operation AnotB that have been executed with calls
* to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
* {@link #notB(org.apache.datasketches.theta.Sketch)}).
*
* @param reset If <i>true</i>, clears this operator to the empty state after this result is
* returned. Set this to <i>false</i> if you wish to obtain an intermediate result.
*
* @return the result of this operation as an ordered, on-heap {@link CompactSketch}.
*/
public abstract CompactSketch getResult(boolean reset);

/**
* Gets the result of this stateful set operation as a CompactSketch of the form based on
* the input arguments.
* The stateful input operations are {@link #setA(Sketch)} and {@link #notB(Sketch)}.
* Gets the result of the multistep, stateful operation AnotB that have been executed with calls
* to {@link #setA(Sketch)} and ({@link #notB(Sketch)} or
* {@link #notB(org.apache.datasketches.theta.Sketch)}).
*
* @param dstOrdered If <i>true</i>, the result will be an ordered {@link CompactSketch}.
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
Expand All @@ -132,7 +133,7 @@ public Family getFamily() {
* @param reset If <i>true</i>, clears this operator to the empty state after this result is
* returned. Set this to <i>false</i> if you wish to obtain an intermediate result.
*
* @return the result of this operation as a {@link CompactSketch} of the chosen form.
* @return the result of this operation as a {@link CompactSketch} in the given dstMem.
*/
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem, boolean reset);

Expand Down
14 changes: 9 additions & 5 deletions src/main/java/org/apache/datasketches/theta/Intersection.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,19 @@ public Family getFamily() {

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
* The {@link #intersect(Sketch)} method must have been called at least once.
* This does not disturb the underlying data structure of this intersection.
* The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
* exception will be thrown. This is because a virgin Intersection object represents the
* Universal Set, which has an infinite number of values.
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public CompactSketch getResult() {
return getResult(true, null);
}

/**
* Gets the result of this operation as a CompactSketch of the chosen form.
* Gets the result of this operation as a CompactSketch in the given dstMem.
* This does not disturb the underlying data structure of this intersection.
* The {@link #intersect(Sketch)} method must have been called at least once, otherwise an
* exception will be thrown. This is because a virgin Intersection object represents the
* Universal Set, which has an infinite number of values.
Expand All @@ -70,14 +74,14 @@ public CompactSketch getResult() {
*
* <p>Presenting an intersection with a null argument will throw an exception.</p>
*
*
* @param dstOrdered
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
*
* @param dstMem
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
*
* @return the result of this operation as a CompactSketch of the chosen form
* @return the result of this operation as a CompactSketch stored in the given dstMem,
* which can be either on or off-heap..
*/
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);

Expand All @@ -90,7 +94,7 @@ public CompactSketch getResult() {
/**
* Resets this Intersection for stateful operations only.
* The seed remains intact, otherwise reverts to
* the Universal Set, theta of 1.0 and empty = false.
* the Universal Set: theta = 1.0, no retained data and empty = false.
*/
public abstract void reset();

Expand Down
40 changes: 20 additions & 20 deletions src/main/java/org/apache/datasketches/theta/IntersectionImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static IntersectionImpl wrapInstance(
@Override
public CompactSketch intersect(final Sketch a, final Sketch b, final boolean dstOrdered,
final WritableMemory dstMem) {
if ((wmem_ != null) && readOnly_) { throw new SketchesReadOnlyException(); }
if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
hardReset();
intersect(a);
intersect(b);
Expand All @@ -233,7 +233,7 @@ public void intersect(final Sketch sketchIn) {
if (sketchIn == null) {
throw new SketchesArgumentException("Intersection argument must not be null.");
}
if ((wmem_ != null) && readOnly_) { throw new SketchesReadOnlyException(); }
if (wmem_ != null && readOnly_) { throw new SketchesReadOnlyException(); }
if (empty_ || sketchIn.isEmpty()) { //empty rule
//Because of the def of null above and the Empty Rule (which is OR), empty_ must be true.
//Whatever the current internal state, we make our local empty.
Expand Down Expand Up @@ -262,14 +262,14 @@ public void intersect(final Sketch sketchIn) {
final int sketchInEntries = sketchIn.getRetainedEntries(true);

//states 1,2,3,6
if ((curCount_ == 0) || (sketchInEntries == 0)) {
if (curCount_ == 0 || sketchInEntries == 0) {
curCount_ = 0;
if (wmem_ != null) { insertCurCount(wmem_, 0); }
hashTable_ = null; //No need for a HT. Don't bother clearing mem if valid
} //end of states 1,2,3,6

// state 5
else if ((curCount_ < 0) && (sketchInEntries > 0)) {
else if (curCount_ < 0 && sketchInEntries > 0) {
curCount_ = sketchIn.getRetainedEntries(true);
final int requiredLgArrLongs = minLgHashTableSize(curCount_, REBUILD_THRESHOLD);
final int priorLgArrLongs = lgArrLongs_; //prior only used in error message
Expand All @@ -295,7 +295,7 @@ else if ((curCount_ < 0) && (sketchInEntries > 0)) {
} //end of state 5

//state 7
else if ((curCount_ > 0) && (sketchInEntries > 0)) {
else if (curCount_ > 0 && sketchInEntries > 0) {
//Sets resulting hashTable, curCount and adjusts lgArrLongs
performIntersect(sketchIn);
} //end of state 7
Expand Down Expand Up @@ -339,6 +339,16 @@ public CompactSketch getResult(final boolean dstOrdered, final WritableMemory ds
dstMem, compactCache);
}

@Override
public boolean hasResult() {
return wmem_ != null ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
}

@Override
public boolean isSameResource(final Memory that) {
return wmem_ != null ? wmem_.isSameResource(that) : false;
}

@Override
public void reset() {
hardReset();
Expand All @@ -347,7 +357,7 @@ public void reset() {
@Override
public byte[] toByteArray() {
final int preBytes = CONST_PREAMBLE_LONGS << 3;
final int dataBytes = (curCount_ > 0) ? 8 << lgArrLongs_ : 0;
final int dataBytes = curCount_ > 0 ? 8 << lgArrLongs_ : 0;
final byte[] byteArrOut = new byte[preBytes + dataBytes];
if (wmem_ != null) {
wmem_.getByteArray(0, byteArrOut, 0, preBytes + dataBytes);
Expand Down Expand Up @@ -376,16 +386,6 @@ public byte[] toByteArray() {
return byteArrOut;
}

@Override
public boolean hasResult() {
return (wmem_ != null) ? wmem_.getInt(RETAINED_ENTRIES_INT) >= 0 : curCount_ >= 0;
}

@Override
public boolean isSameResource(final Memory that) {
return (wmem_ != null) ? wmem_.isSameResource(that) : false;
}

//restricted

/**
Expand All @@ -405,7 +405,7 @@ boolean isEmpty() {
@Override
long[] getCache() {
if (wmem_ == null) {
return (hashTable_ != null) ? hashTable_ : new long[0];
return hashTable_ != null ? hashTable_ : new long[0];
}
//Direct
final int arrLongs = 1 << lgArrLongs_;
Expand All @@ -426,7 +426,7 @@ long getThetaLong() {

private void performIntersect(final Sketch sketchIn) {
// curCount and input data are nonzero, match against HT
assert ((curCount_ > 0) && (!empty_));
assert curCount_ > 0 && !empty_;
final long[] cacheIn = sketchIn.getCache();
final int arrLongsIn = cacheIn.length;
final long[] hashTable;
Expand Down Expand Up @@ -458,7 +458,7 @@ private void performIntersect(final Sketch sketchIn) {
//either unordered compact or hash table
for (int i = 0; i < arrLongsIn; i++ ) {
final long hashIn = cacheIn[i];
if ((hashIn <= 0L) || (hashIn >= thetaLong_)) { continue; }
if (hashIn <= 0L || hashIn >= thetaLong_) { continue; }
final int foundIdx = hashSearch(hashTable, lgArrLongs_, hashIn);
if (foundIdx == -1) { continue; }
matchSet[matchSetCount++] = hashIn;
Expand Down Expand Up @@ -505,7 +505,7 @@ private void moveDataToTgt(final long[] arr, final int count) {
tmpCnt++;
}
}
assert (tmpCnt == count) : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
assert tmpCnt == count : "Intersection Count Check: got: " + tmpCnt + ", expected: " + count;
}

private void hardReset() {
Expand Down
20 changes: 10 additions & 10 deletions src/main/java/org/apache/datasketches/theta/JaccardSimilarity.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public final class JaccardSimilarity {
*/
public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
//Corner case checks
if ((sketchA == null) || (sketchB == null)) { return ZEROS.clone(); }
if (sketchA == null || sketchB == null) { return ZEROS.clone(); }
if (sketchA == sketchB) { return ONES.clone(); }
if (sketchA.isEmpty() && sketchB.isEmpty()) { return ONES.clone(); }
if (sketchA.isEmpty() || sketchB.isEmpty()) { return ZEROS.clone(); }
Expand All @@ -68,17 +68,17 @@ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
final int newK = max(min(ceilingPowerOf2(countA + countB), maxK), minK);
final Union union =
SetOperation.builder().setNominalEntries(newK).buildUnion();
union.update(sketchA);
union.update(sketchB);
union.union(sketchA);
union.union(sketchB);
final Sketch unionAB = union.getResult(false, null);
final long thetaLongUAB = unionAB.getThetaLong();
final long thetaLongA = sketchA.getThetaLong();
final long thetaLongB = sketchB.getThetaLong();
final int countUAB = unionAB.getRetainedEntries(true);

//Check for identical data
if ((countUAB == countA) && (countUAB == countB)
&& (thetaLongUAB == thetaLongA) && (thetaLongUAB == thetaLongB)) {
if (countUAB == countA && countUAB == countB
&& thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
return ONES.clone();
}

Expand All @@ -105,7 +105,7 @@ public static double[] jaccard(final Sketch sketchA, final Sketch sketchB) {
*/
public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) {
//Corner case checks
if ((sketchA == null) || (sketchB == null)) { return false; }
if (sketchA == null || sketchB == null) { return false; }
if (sketchA == sketchB) { return true; }
if (sketchA.isEmpty() && sketchB.isEmpty()) { return true; }
if (sketchA.isEmpty() || sketchB.isEmpty()) { return false; }
Expand All @@ -116,17 +116,17 @@ public static boolean exactlyEqual(final Sketch sketchA, final Sketch sketchB) {
//Create the Union
final Union union =
SetOperation.builder().setNominalEntries(ceilingPowerOf2(countA + countB)).buildUnion();
union.update(sketchA);
union.update(sketchB);
union.union(sketchA);
union.union(sketchB);
final Sketch unionAB = union.getResult();
final long thetaLongUAB = unionAB.getThetaLong();
final long thetaLongA = sketchA.getThetaLong();
final long thetaLongB = sketchB.getThetaLong();
final int countUAB = unionAB.getRetainedEntries(true);

//Check for identical counts and thetas
if ((countUAB == countA) && (countUAB == countB)
&& (thetaLongUAB == thetaLongA) && (thetaLongUAB == thetaLongB)) {
if (countUAB == countA && countUAB == countB
&& thetaLongUAB == thetaLongA && thetaLongUAB == thetaLongB) {
return true;
}
return false;
Expand Down
44 changes: 36 additions & 8 deletions src/main/java/org/apache/datasketches/theta/Union.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ public Family getFamily() {
return Family.UNION;
}

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
* This does not disturb the underlying data structure of the union.
* Therefore, it is OK to continue updating the union after this operation.
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public abstract CompactSketch getResult();

/**
* Gets the result of this operation as a CompactSketch of the chosen form.
* This does not disturb the underlying data structure of the union.
Expand All @@ -50,14 +58,6 @@ public Family getFamily() {
*/
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap.
* This does not disturb the underlying data structure of the union.
* Therefore, it is OK to continue updating the union after this operation.
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public abstract CompactSketch getResult();

/**
* Resets this Union. The seed remains intact, otherwise reverts back to its virgin state.
*/
Expand Down Expand Up @@ -108,6 +108,20 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO
*
* @param sketchIn The incoming sketch.
*/
public abstract void union(Sketch sketchIn);


/**
* Perform a Union operation with <i>this</i> union and the given on-heap sketch of the Theta Family.
* This method is not valid for the older SetSketch, which was prior to Open Source (August, 2015).
*
* <p>This method can be repeatedly called.
* If the given sketch is null it is interpreted as an empty sketch.</p>
*
* @param sketchIn The incoming sketch.
* @deprecated 2.0.0. Use {@link #union(Sketch)} instead.
*/
@Deprecated
public abstract void update(Sketch sketchIn);

/**
Expand All @@ -120,6 +134,20 @@ public abstract CompactSketch union(Sketch sketchA, Sketch sketchB, boolean dstO
*
* @param mem Memory image of sketch to be merged
*/
public abstract void union(Memory mem);

/**
* Perform a Union operation with <i>this</i> union and the given Memory image of any sketch of the
* Theta Family. The input image may be from earlier versions of the Theta Compact Sketch,
* called the SetSketch (circa 2012), which was prior to Open Source and are compact and ordered.
*
* <p>This method can be repeatedly called.
* If the given sketch is null it is interpreted as an empty sketch.</p>
*
* @param mem Memory image of sketch to be merged
* @deprecated 2.0.0. Use {@link #union(Memory)} instead.
*/
@Deprecated
public abstract void update(Memory mem);

/**
Expand Down
Loading

0 comments on commit ac00afa

Please sign in to comment.