Skip to content

Commit

Permalink
Merge pull request #252 from DataSketches/NewEmpty
Browse files Browse the repository at this point in the history
New empty
  • Loading branch information
leerho authored Apr 2, 2019
2 parents bd02cc3 + 65da0ff commit cec007a
Show file tree
Hide file tree
Showing 36 changed files with 927 additions and 666 deletions.
16 changes: 10 additions & 6 deletions src/main/java/com/yahoo/sketches/quantiles/DoublesSketch.java
Original file line number Diff line number Diff line change
Expand Up @@ -128,23 +128,27 @@ public abstract class DoublesSketch {
static final int MIN_K = 2;
static final int MAX_K = 1 << 15;

/**
* Parameter that controls space usage of sketch and accuracy of estimates.
*/
final int k_;

/**
* Setting the seed makes the results of the sketch deterministic if the input values are
* received in exactly the same order. This is only useful when performing test comparisons,
* otherwise is not recommended.
*/
public static Random rand = new Random();
static Random rand = new Random();

/**
* Parameter that controls space usage of sketch and accuracy of estimates.
*/
final int k_;

DoublesSketch(final int k) {
Util.checkK(k);
k_ = k;
}

synchronized static void setRandom(final long seed) {
DoublesSketch.rand = new Random(seed);
}

/**
* Returns a new builder
* @return a new builder
Expand Down
41 changes: 34 additions & 7 deletions src/main/java/com/yahoo/sketches/theta/AnotB.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
* <p>Calling the update function a second time essentially clears the internal state and updates
* with the new pair of sketches.
*
* <p>As an alternative, one can use the aNotB method that returns the result immediately.
*
* @author Lee Rhodes
*/
public abstract class AnotB extends SetOperation {
Expand All @@ -30,10 +32,16 @@ public Family getFamily() {
return Family.A_NOT_B;
}

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public abstract CompactSketch getResult();

/**
* Gets the result of this set operation as a CompactSketch of the chosen form
* @param dstOrdered
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
*
* @param dstMem
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
Expand All @@ -42,12 +50,6 @@ public Family getFamily() {
*/
public abstract CompactSketch getResult(boolean dstOrdered, WritableMemory dstMem);

/**
* Gets the result of this operation as an ordered CompactSketch on the Java heap
* @return the result of this operation as an ordered CompactSketch on the Java heap
*/
public abstract CompactSketch getResult();

/**
* Perform A-and-not-B set operation on the two given sketches.
* A null sketch is interpreted as an empty sketch.
Expand All @@ -57,4 +59,29 @@ public Family getFamily() {
*/
public abstract void update(Sketch a, Sketch b);

/**
* Perform A-and-not-B set operation on the two given sketches and return the result as an
* ordered CompactSketch on the heap.
* @param a The incoming sketch for the first argument
* @param b The incoming sketch for the second argument
* @return an ordered CompactSketch on the heap
*/
public CompactSketch aNotB(final Sketch a, final Sketch b) {
return aNotB(a, b, true, null);
}

/**
* Perform A-and-not-B set operation on the two given sketches and return the result as a
* CompactSketch.
* @param a The incoming sketch for the first argument
* @param b The incoming sketch for the second argument
* @param dstOrdered
* <a href="{@docRoot}/resources/dictionary.html#dstOrdered">See Destination Ordered</a>.
* @param dstMem
* <a href="{@docRoot}/resources/dictionary.html#dstMem">See Destination Memory</a>.
* @return the result as a CompactSketch.
*/
public abstract CompactSketch aNotB(Sketch a, Sketch b, boolean dstOrdered,
WritableMemory dstMem);

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

package com.yahoo.sketches.theta;

import static com.yahoo.sketches.theta.UpdateReturnState.InsertedCountIncremented;
import static com.yahoo.sketches.theta.UpdateReturnState.ConcurrentBufferInserted;
import static com.yahoo.sketches.theta.UpdateReturnState.ConcurrentPropagated;
import static com.yahoo.sketches.theta.UpdateReturnState.RejectedOverTheta;

import java.util.concurrent.atomic.AtomicBoolean;
Expand All @@ -27,11 +28,6 @@
*/
final class ConcurrentHeapThetaBuffer extends HeapQuickSelectSketch {

private static int computeLogBufferSize(final int lgNomLongs, final long exactSize,
final int maxNumLocalBuffers) {
return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers)));
}

// Shared sketch consisting of the global sample set and theta value.
private final ConcurrentSharedThetaSketch shared;

Expand Down Expand Up @@ -60,6 +56,11 @@ private static int computeLogBufferSize(final int lgNomLongs, final long exactSi
localPropagationInProgress = new AtomicBoolean(false);
}

private static int computeLogBufferSize(final int lgNomLongs, final long exactSize,
final int maxNumLocalBuffers) {
return Math.min(lgNomLongs, (int)Math.log(Math.sqrt(exactSize) / (2 * maxNumLocalBuffers)));
}

//Sketch overrides

@Override
Expand All @@ -82,6 +83,11 @@ public double getUpperBound(final int numStdDev) {
return shared.getUpperBound(numStdDev);
}

@Override
public boolean hasMemory() {
return shared.hasMemory();
}

@Override
public boolean isDirect() {
return shared.isDirect();
Expand Down Expand Up @@ -129,18 +135,21 @@ UpdateReturnState hashUpdate(final long hash) {
}
HashOperations.checkHashCorruption(hash);
if ((getHashTableThreshold() == 0) || isExactMode ) {
final long thetaLong = getThetaLong();
//The over-theta and zero test
if (HashOperations.continueCondition(thetaLong, hash)) {
if (HashOperations.continueCondition(getThetaLong(), hash)) {
return RejectedOverTheta; //signal that hash was rejected due to theta or zero.
}
if (propagateToSharedSketch(hash)) {
return InsertedCountIncremented; //not totally correct
return ConcurrentPropagated;
}
}
final UpdateReturnState state = super.hashUpdate(hash);
if (isOutOfSpace(getRetainedEntries() + 1)) {
propagateToSharedSketch();
return ConcurrentPropagated;
}
if (state == UpdateReturnState.InsertedCountIncremented) {
return ConcurrentBufferInserted;
}
return state;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ boolean propagate(final AtomicBoolean localPropagationInProgress, final Sketch s

double getUpperBound(int numStdDev);

boolean hasMemory();

boolean isDirect();

boolean isEmpty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ static DirectCompactOrderedSketch wrapInstance(final Memory srcMem, final long s
* @param dstMem the given destination Memory. This clears it before use.
* @return a DirectCompactOrderedSketch.
*/
static DirectCompactOrderedSketch compact(final UpdateSketch sketch,
final WritableMemory dstMem) {
final long thetaLong = sketch.getThetaLong();
final boolean empty = sketch.isEmpty();
static DirectCompactOrderedSketch compact(final UpdateSketch sketch, final WritableMemory dstMem) {
final int curCount = sketch.getRetainedEntries(true);
long thetaLong = sketch.getThetaLong();
boolean empty = sketch.isEmpty();
thetaLong = thetaOnCompact(empty, curCount, thetaLong);
empty = emptyOnCompact(curCount, thetaLong);
final int preLongs = computeCompactPreLongs(thetaLong, empty, curCount);
final short seedHash = sketch.getSeedHash();
final long[] cache = sketch.getCache();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ static DirectCompactUnorderedSketch wrapInstance(final Memory srcMem, final long
*/
static DirectCompactUnorderedSketch compact(final UpdateSketch sketch,
final WritableMemory dstMem) {
final long thetaLong = sketch.getThetaLong();
final boolean empty = sketch.isEmpty();
final int curCount = sketch.getRetainedEntries(true);
long thetaLong = sketch.getThetaLong();
boolean empty = sketch.isEmpty();
thetaLong = thetaOnCompact(empty, curCount, thetaLong);
empty = emptyOnCompact(curCount, thetaLong);
final int preLongs = computeCompactPreLongs(thetaLong, empty, curCount);
final short seedHash = sketch.getSeedHash();
final long[] cache = sketch.getCache();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static com.yahoo.sketches.theta.PreambleUtil.insertSeedHash;
import static com.yahoo.sketches.theta.PreambleUtil.insertSerVer;
import static com.yahoo.sketches.theta.PreambleUtil.insertThetaLong;
import static com.yahoo.sketches.theta.PreambleUtil.insertUnionThetaLong;
import static com.yahoo.sketches.theta.Rebuilder.actLgResizeFactor;
import static com.yahoo.sketches.theta.Rebuilder.moveAndResize;
import static com.yahoo.sketches.theta.Rebuilder.quickSelectAndRebuild;
Expand Down Expand Up @@ -132,6 +133,9 @@ private DirectQuickSelectSketch(
insertP(dstMem, p); //bytes 12-15
final long thetaLong = (long)(p * MAX_THETA_LONG_AS_DOUBLE);
insertThetaLong(dstMem, thetaLong); //bytes 16-23
if (unionGadget) {
insertUnionThetaLong(dstMem, thetaLong);
}
//@formatter:on

//clear hash table area
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ static final CompactSketch heapify1to3(final Memory srcMem, final long seed) {

final long[] compactOrderedCache = new long[curCount];
srcMem.getLongArray(24, compactOrderedCache, 0, curCount);

return HeapCompactOrderedSketch
.compact(compactOrderedCache, false, seedHash, curCount, thetaLong);
}
Expand Down Expand Up @@ -93,11 +92,10 @@ static final CompactSketch heapify2to3(final Memory srcMem, final long seed) {
validateInputSize(reqBytesIn, memCap);

final long thetaLong = (mdLongs < 3) ? Long.MAX_VALUE : srcMem.getLong(THETA_LONG);
final boolean empty = (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0;

boolean empty = (srcMem.getByte(FLAGS_BYTE) & EMPTY_FLAG_MASK) != 0;
empty = (curCount == 0) && (thetaLong == Long.MAX_VALUE); //force true
final long[] compactOrderedCache = new long[curCount];
srcMem.getLongArray(mdLongs << 3, compactOrderedCache, 0, curCount);

return HeapCompactOrderedSketch
.compact(compactOrderedCache, empty, seedHash, curCount, thetaLong);
}
Expand Down
Loading

0 comments on commit cec007a

Please sign in to comment.