Skip to content

Commit

Permalink
tweaked memory size estimator again
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Alfonsi committed Oct 4, 2023
1 parent 230194a commit 75be197
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@

/**
* This class implements KeyLookupStore<Integer> using a roaring bitmap with a modulo applied to values.
* The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^29.
* The modulo increases the density of values, which makes RBMs more memory-efficient. The recommended modulo is ~2^28.
* It also maintains a hash set of values which have had collisions. Values which haven't had collisions can be
* safely removed from the store. The fraction of collided values should be low,
* about 0.3% for a store with 10^7 values and a modulo of 2^29.
* about 0.5% for a store with 10^7 values and a modulo of 2^28.
* The store estimates its memory footprint and will stop adding more values once it reaches its memory cap.
*/
public class RBMIntKeyLookupStore implements KeyLookupStore<Integer> {
Expand Down Expand Up @@ -75,23 +75,24 @@ protected KeyStoreStats(long memSizeCapInBytes, int maxNumEntries) {
protected KeyStoreStats stats;
protected RoaringBitmap rbm;
private HashSet<Integer> collidedInts;
protected RBMSizeEstimator sizeEstimator;
protected final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
protected final Lock readLock = lock.readLock();
protected final Lock writeLock = lock.writeLock();

RBMIntKeyLookupStore(int modulo, long memSizeCapInBytes) {
this.modulo = modulo;
sizeEstimator = new RBMSizeEstimator(modulo);
this.stats = new KeyStoreStats(memSizeCapInBytes, calculateMaxNumEntries(memSizeCapInBytes));
this.rbm = new RoaringBitmap();
collidedInts = new HashSet<>();

}

protected int calculateMaxNumEntries(long memSizeCapInBytes) {
if (memSizeCapInBytes == 0) {
return Integer.MAX_VALUE;
}
return RBMSizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes);
return sizeEstimator.getNumEntriesFromSizeInBytes(memSizeCapInBytes);
}

protected final int transform(int value) {
Expand Down Expand Up @@ -233,7 +234,7 @@ public boolean isCollision(Integer value1, Integer value2) {

@Override
public long getMemorySizeInBytes() {
return RBMSizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size());
return sizeEstimator.getSizeInBytes(stats.size) + RBMSizeEstimator.getHashsetMemSizeInBytes(collidedInts.size());
}

@Override
Expand Down
65 changes: 59 additions & 6 deletions server/src/main/java/org/opensearch/indices/RBMSizeEstimator.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,62 @@
public class RBMSizeEstimator {
public static final int BYTES_IN_MB = 1048576;
public static final double HASHSET_MEM_SLOPE = 6.46 * Math.pow(10, -5);
public static final double slope = 0.62;
public static final double bufferMultiplier = 1.5;
public static final double intercept = 2.9;
protected final double slope;
protected final double bufferMultiplier;
protected final double intercept;

RBMSizeEstimator() {}
RBMSizeEstimator(int modulo) {
double[] memValues = calculateMemoryCoefficients(modulo);
this.bufferMultiplier = memValues[0];
this.slope = memValues[1];
this.intercept = memValues[2];
}

public static double[] calculateMemoryCoefficients(int modulo) {
// Sets up values to help estimate RBM size given a modulo
// Returns an array of {bufferMultiplier, slope, intercept}

public static long getSizeInBytes(int numEntries) {
double modifiedModulo;
if (modulo == 0) {
modifiedModulo = 32.0;
} else {
modifiedModulo = Math.log(modulo) / Math.log(2);
}
// we "round up" the modulo to the nearest tested value
double highCutoff = 29.001; // Floating point makes 29 not work
double mediumCutoff = 28.0;
double lowCutoff = 26.0;
double bufferMultiplier = 1.0;
double slope;
double intercept;
if (modifiedModulo > highCutoff) {
// modulo > 2^29
bufferMultiplier = 1.2;
slope = 0.637;
intercept = 3.091;
} else if (modifiedModulo > mediumCutoff) {
// 2^29 >= modulo > 2^28
slope = 0.619;
intercept = 2.993;
} else if (modifiedModulo > lowCutoff) {
// 2^28 >= modulo > 2^26
slope = 0.614;
intercept = 2.905;
} else {
slope = 0.628;
intercept = 2.603;
}
return new double[] { bufferMultiplier, slope, intercept };
}

public long getSizeInBytes(int numEntries) {
// Based on a linear fit in log-log space, so that we minimize the error as a proportion rather than as
// an absolute value. Should be within ~50% of the true value at worst, and should overestimate rather
// than underestimate the memory usage
return (long) ((long) Math.pow(numEntries, slope) * (long) Math.pow(10, intercept) * bufferMultiplier);
}

public static int getNumEntriesFromSizeInBytes(long sizeInBytes) {
public int getNumEntriesFromSizeInBytes(long sizeInBytes) {
// This function has some precision issues especially when composed with its inverse:
// numEntries = getNumEntriesFromSizeInBytes(getSizeInBytes(numEntries))
// In this case the result can be off by up to a couple percent
Expand All @@ -64,6 +106,17 @@ public static int getNumEntriesFromSizeInBytes(long sizeInBytes) {

}

public static long getSizeInBytesWithModulo(int numEntries, int modulo) {
double[] memValues = calculateMemoryCoefficients(modulo);
return (long) ((long) Math.pow(numEntries, memValues[1]) * (long) Math.pow(10, memValues[2]) * memValues[0]);
}

public static int getNumEntriesFromSizeInBytesWithModulo(long sizeInBytes, int modulo) {
double[] memValues = calculateMemoryCoefficients(modulo);
return (int) Math.pow(sizeInBytes / (memValues[0] * Math.pow(10, memValues[2])), 1 / memValues[1]);
}


protected static long convertMBToBytes(double valMB) {
return (long) (valMB * BYTES_IN_MB);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ public void testAddingDuplicates() throws Exception {
public void testMemoryCapBlocksAdd() throws Exception {
int modulo = (int) Math.pow(2, 29);
for (int maxEntries: new int[]{2342000, 1000, 100000}) {
long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytes(maxEntries);
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore((int) Math.pow(2, 29), memSizeCapInBytes);
long memSizeCapInBytes = RBMSizeEstimator.getSizeInBytesWithModulo(maxEntries, modulo);
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes);
for (int j = 0; j < maxEntries + 1000; j++) {
kls.add(j);
}
Expand Down Expand Up @@ -282,4 +282,27 @@ public void testNullInputs() throws Exception {
kls.regenerateStore(newVals);
assertEquals(4, kls.getSize());
}

public void testMemoryCapValueInitialization() {
double[] logModulos = new double[] { 0.0, 31.2, 30, 29, 28, 13 };
double[] expectedMultipliers = new double[] { 1.2, 1.2, 1.2, 1, 1, 1 };
double[] expectedSlopes = new double[] { 0.637, 0.637, 0.637, 0.619, 0.614, 0.629 };
double[] expectedIntercepts = new double[] { 3.091, 3.091, 3.091, 2.993, 2.905, 2.603 };
long memSizeCapInBytes = (long) 100.0 * RBMSizeEstimator.BYTES_IN_MB;
double delta = 0.01;
for (int i = 0; i < logModulos.length; i++) {
int modulo = 0;
if (logModulos[i] != 0) {
modulo = (int) Math.pow(2, logModulos[i]);
}
RBMIntKeyLookupStore kls = new RBMIntKeyLookupStore(modulo, memSizeCapInBytes);
assertEquals(kls.stats.memSizeCapInBytes, kls.getMemorySizeCapInBytes(), 1.0);
assertEquals(expectedMultipliers[i], kls.sizeEstimator.bufferMultiplier, delta);
assertEquals(expectedSlopes[i], kls.sizeEstimator.slope, delta);
assertEquals(expectedIntercepts[i], kls.sizeEstimator.intercept, delta);
System.out.println("log modulo: " + logModulos[i]);
System.out.println("Estimated size at 10^6: " + kls.sizeEstimator.getSizeInBytes(1000000));
}

}
}

0 comments on commit 75be197

Please sign in to comment.