Skip to content

Commit

Permalink
Experiment: again try a lighter impl for encoder node cache (#212)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ostrzyciel authored Nov 8, 2024
1 parent b64515a commit c2bad54
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 47 deletions.
27 changes: 27 additions & 0 deletions core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.ostrzyciel.jelly.core;

/**
* A terrifyingly simple cache.
*/
abstract class EncoderNodeCache<V> {

protected final Object[] keys;

protected final int sizeMinusOne;

protected EncoderNodeCache(int minimumSize) {
var size = Integer.highestOneBit(minimumSize);
if (size < minimumSize) {
size <<= 1;
}
this.sizeMinusOne = size - 1;
keys = new Object[size];
}

protected int calcIndex(Object key) {
int h = key.hashCode();
// Spread bits to avoid collisions for hashes that differ only in the upper bits.
// Trick from HashMap.hash()
return (h ^ h >>> 16) & sizeMinusOne;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package eu.ostrzyciel.jelly.core;

import eu.ostrzyciel.jelly.core.proto.v1.UniversalTerm;

/**
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
*/
final class DependentNode {
// The actual cached node
UniversalTerm encoded;
// 1: datatypes and IRI names
// The pointer is the index in the lookup table, the serial is the serial number of the entry.
// The serial in the lookup table must be equal to the serial here for the entry to be valid.
int lookupPointer1;
int lookupSerial1;
// 2: IRI prefixes
int lookupPointer2;
int lookupSerial2;
}

class EncoderNodeCacheDependent extends EncoderNodeCache<DependentNode> {
private final DependentNode[] values;

EncoderNodeCacheDependent(int minimumSize) {
super(minimumSize);
DependentNode[] x = new DependentNode[sizeMinusOne + 1];
values = x;
for (int i = 0; i < values.length; i++) {
values[i] = new DependentNode();
}
}

DependentNode getOrClearIfAbsent(Object key) {
final int idx = calcIndex(key);
final Object storedKey = keys[idx];
final DependentNode node = values[idx];
if (storedKey != null && (storedKey == key || storedKey.equals(key))) {
return node;
} else {
node.encoded = null;
node.lookupPointer1 = 0;
node.lookupPointer2 = 0;
keys[idx] = key;
return node;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package eu.ostrzyciel.jelly.core;

import eu.ostrzyciel.jelly.core.proto.v1.UniversalTerm;

import java.util.function.Function;

class EncoderNodeCacheSimple extends EncoderNodeCache<UniversalTerm> {
private final UniversalTerm[] values;

EncoderNodeCacheSimple(int minimumSize) {
super(minimumSize);
UniversalTerm[] x = new UniversalTerm[sizeMinusOne + 1];
values = x;
}

UniversalTerm getOrComputeIfAbsent(Object key, Function<Object, UniversalTerm> f) {
final int idx = calcIndex(key);
final Object storedKey = keys[idx];
if (storedKey != null && (storedKey == key || storedKey.equals(key))) {
return values[idx];
} else {
keys[idx] = key;
UniversalTerm newTerm = f.apply(key);
values[idx] = newTerm;
return newTerm;
}
}
}
52 changes: 9 additions & 43 deletions core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,40 +19,6 @@
* @param <TNode> The type of RDF nodes used by the RDF library.
*/
public final class NodeEncoder<TNode> {
/**
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
*/
static final class DependentNode {
// The actual cached node
public UniversalTerm encoded;
// 1: datatypes and IRI names
// The pointer is the index in the lookup table, the serial is the serial number of the entry.
// The serial in the lookup table must be equal to the serial here for the entry to be valid.
public int lookupPointer1;
public int lookupSerial1;
// 2: IRI prefixes
public int lookupPointer2;
public int lookupSerial2;
}

/**
* A simple LRU cache for already encoded nodes.
* @param <K> Key type
* @param <V> Value type
*/
private static final class NodeCache<K, V> extends LinkedHashMap<K, V> {
private final int maxSize;

public NodeCache(int maxSize) {
this.maxSize = maxSize;
}

@Override
protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
return size() > maxSize;
}
}

private final int maxPrefixTableSize;
private int lastIriNameId;
private int lastIriPrefixId = -1000;
Expand All @@ -63,9 +29,9 @@ protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {

// We split the node caches in three – the first two are for nodes that depend on the lookups
// (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
private final NodeCache<Object, DependentNode> iriNodeCache;
private final NodeCache<Object, DependentNode> dtLiteralNodeCache;
private final NodeCache<Object, UniversalTerm> nodeCache;
private final EncoderNodeCacheDependent iriNodeCache;
private final EncoderNodeCacheDependent dtLiteralNodeCache;
private final EncoderNodeCacheSimple nodeCache;

// Pre-allocated IRI that has prefixId=0 and nameId=0
static final RdfIri zeroIri = new RdfIri(0, 0);
Expand All @@ -84,7 +50,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
this.maxPrefixTableSize = opt.maxPrefixTableSize();
if (maxPrefixTableSize > 0) {
prefixLookup = new EncoderLookup(maxPrefixTableSize, true);
iriNodeCache = new NodeCache<>(iriNodeCacheSize);
iriNodeCache = new EncoderNodeCacheDependent(iriNodeCacheSize);
} else {
prefixLookup = null;
iriNodeCache = null;
Expand All @@ -93,9 +59,9 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
for (int i = 0; i < nameOnlyIris.length; i++) {
nameOnlyIris[i] = new RdfIri(0, i);
}
dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize);
dtLiteralNodeCache = new EncoderNodeCacheDependent(dtLiteralNodeCacheSize);
nameLookup = new EncoderLookup(opt.maxNameTableSize(), maxPrefixTableSize > 0);
nodeCache = new NodeCache<>(nodeCacheSize);
nodeCache = new EncoderNodeCacheSimple(nodeCacheSize);
}

/**
Expand All @@ -109,7 +75,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
public UniversalTerm encodeDtLiteral(
TNode key, String lex, String datatypeName, ArrayBuffer<RdfStreamRow> rowsBuffer
) {
var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode());
var cachedNode = dtLiteralNodeCache.getOrClearIfAbsent(key);
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1]
Expand Down Expand Up @@ -161,7 +127,7 @@ public UniversalTerm encodeIri(String iri, ArrayBuffer<RdfStreamRow> rowsBuffer)
}

// Slow path, with splitting out the prefix
var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode());
var cachedNode = iriNodeCache.getOrClearIfAbsent(iri);
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == nameLookup.serials[cachedNode.lookupPointer1] &&
Expand Down Expand Up @@ -246,6 +212,6 @@ private UniversalTerm outputIri(DependentNode cachedNode) {
* @return The encoded node
*/
public UniversalTerm encodeOther(Object key, Function<Object, UniversalTerm> encoder) {
return nodeCache.computeIfAbsent(key, encoder);
return nodeCache.getOrComputeIfAbsent(key, encoder);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,11 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](val options: RdfS
private val extraRowsBuffer = new ArrayBuffer[RdfStreamRow](32)
private val nodeEncoder = new NodeEncoder[TNode](
options,
// Make the node cache size between 256 and 1024, depending on the user's maxNameTableSize.
Math.max(Math.min(options.maxNameTableSize, 1024), 256),
options.maxNameTableSize,
Math.max(Math.min(options.maxNameTableSize, 1024), 256),
// Make the node cache size between 256 and 2048, depending on the user's maxNameTableSize.
Math.max(Math.min(options.maxNameTableSize, 2048), 512),
// IRI cache can be the largest...
Math.max(Math.min((options.maxNameTableSize * 1.5).toInt, 8192), 512),
Math.max(Math.min(options.maxNameTableSize, 2048), 512),
)
private var emittedOptions = false

Expand Down

0 comments on commit c2bad54

Please sign in to comment.