Skip to content

Commit

Permalink
Revert "Experiment: again try a lighter impl for encoder node cache (J…
Browse files Browse the repository at this point in the history
…elly-RDF#212)"

Of course, this doesn't work. I have no idea how the default hashmaps
can be so good, but I bow to the ancient masters of Java. I give up.

This reverts commit c2bad54.
  • Loading branch information
Ostrzyciel committed Nov 8, 2024
1 parent c2bad54 commit 2d428d8
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 116 deletions.
27 changes: 0 additions & 27 deletions core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCache.java

This file was deleted.

This file was deleted.

This file was deleted.

52 changes: 43 additions & 9 deletions core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,40 @@
* @param <TNode> The type of RDF nodes used by the RDF library.
*/
public final class NodeEncoder<TNode> {
/**
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
*/
static final class DependentNode {
// The actual cached node
public UniversalTerm encoded;
// 1: datatypes and IRI names
// The pointer is the index in the lookup table, the serial is the serial number of the entry.
// The serial in the lookup table must be equal to the serial here for the entry to be valid.
public int lookupPointer1;
public int lookupSerial1;
// 2: IRI prefixes
public int lookupPointer2;
public int lookupSerial2;
}

/**
* A simple LRU cache for already encoded nodes.
* @param <K> Key type
* @param <V> Value type
*/
private static final class NodeCache<K, V> extends LinkedHashMap<K, V> {
private final int maxSize;

public NodeCache(int maxSize) {
this.maxSize = maxSize;
}

@Override
protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
return size() > maxSize;
}
}

private final int maxPrefixTableSize;
private int lastIriNameId;
private int lastIriPrefixId = -1000;
Expand All @@ -29,9 +63,9 @@ public final class NodeEncoder<TNode> {

// We split the node caches in three – the first two are for nodes that depend on the lookups
// (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
private final EncoderNodeCacheDependent iriNodeCache;
private final EncoderNodeCacheDependent dtLiteralNodeCache;
private final EncoderNodeCacheSimple nodeCache;
private final NodeCache<Object, DependentNode> iriNodeCache;
private final NodeCache<Object, DependentNode> dtLiteralNodeCache;
private final NodeCache<Object, UniversalTerm> nodeCache;

// Pre-allocated IRI that has prefixId=0 and nameId=0
static final RdfIri zeroIri = new RdfIri(0, 0);
Expand All @@ -50,7 +84,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
this.maxPrefixTableSize = opt.maxPrefixTableSize();
if (maxPrefixTableSize > 0) {
prefixLookup = new EncoderLookup(maxPrefixTableSize, true);
iriNodeCache = new EncoderNodeCacheDependent(iriNodeCacheSize);
iriNodeCache = new NodeCache<>(iriNodeCacheSize);
} else {
prefixLookup = null;
iriNodeCache = null;
Expand All @@ -59,9 +93,9 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
for (int i = 0; i < nameOnlyIris.length; i++) {
nameOnlyIris[i] = new RdfIri(0, i);
}
dtLiteralNodeCache = new EncoderNodeCacheDependent(dtLiteralNodeCacheSize);
dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize);
nameLookup = new EncoderLookup(opt.maxNameTableSize(), maxPrefixTableSize > 0);
nodeCache = new EncoderNodeCacheSimple(nodeCacheSize);
nodeCache = new NodeCache<>(nodeCacheSize);
}

/**
Expand All @@ -75,7 +109,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
public UniversalTerm encodeDtLiteral(
TNode key, String lex, String datatypeName, ArrayBuffer<RdfStreamRow> rowsBuffer
) {
var cachedNode = dtLiteralNodeCache.getOrClearIfAbsent(key);
var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode());
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1]
Expand Down Expand Up @@ -127,7 +161,7 @@ public UniversalTerm encodeIri(String iri, ArrayBuffer<RdfStreamRow> rowsBuffer)
}

// Slow path, with splitting out the prefix
var cachedNode = iriNodeCache.getOrClearIfAbsent(iri);
var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode());
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == nameLookup.serials[cachedNode.lookupPointer1] &&
Expand Down Expand Up @@ -212,6 +246,6 @@ private UniversalTerm outputIri(DependentNode cachedNode) {
* @return The encoded node
*/
public UniversalTerm encodeOther(Object key, Function<Object, UniversalTerm> encoder) {
return nodeCache.getOrComputeIfAbsent(key, encoder);
return nodeCache.computeIfAbsent(key, encoder);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,10 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](val options: RdfS
private val extraRowsBuffer = new ArrayBuffer[RdfStreamRow](32)
private val nodeEncoder = new NodeEncoder[TNode](
options,
// Make the node cache size between 256 and 2048, depending on the user's maxNameTableSize.
Math.max(Math.min(options.maxNameTableSize, 2048), 512),
// IRI cache can be the largest...
Math.max(Math.min((options.maxNameTableSize * 1.5).toInt, 8192), 512),
Math.max(Math.min(options.maxNameTableSize, 2048), 512),
// Make the node cache size between 256 and 1024, depending on the user's maxNameTableSize.
Math.max(Math.min(options.maxNameTableSize, 1024), 256),
options.maxNameTableSize,
Math.max(Math.min(options.maxNameTableSize, 1024), 256),
)
private var emittedOptions = false

Expand Down

0 comments on commit 2d428d8

Please sign in to comment.