Skip to content

Commit

Permalink
Encoder: maybe speed up the node cache (2)
Browse files Browse the repository at this point in the history
For jelly-big I saw similar performance, but for jelly-small much worse...

Here I've increased the node cache size significantly (hey, it's just an array of pointers, what does that cost in the era of Google Chrome) and made optimized implementations for both caches to avoid doing useless operations.
  • Loading branch information
Ostrzyciel committed Oct 22, 2024
1 parent 31e19e1 commit dbd2395
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 55 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package eu.ostrzyciel.jelly.core;

import eu.ostrzyciel.jelly.core.proto.v1.UniversalTerm;

import java.util.function.Function;

/**
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
*/
final class DependentNode {
// The actual cached node
public UniversalTerm encoded;
// 1: datatypes and IRI names
// The pointer is the index in the lookup table, the serial is the serial number of the entry.
// The serial in the lookup table must be equal to the serial here for the entry to be valid.
public int lookupPointer1;
public int lookupSerial1;
// 2: IRI prefixes
public int lookupPointer2;
public int lookupSerial2;
}

final class DependentNodeCache extends EncoderNodeCache<Object, DependentNode> {
private final DependentNode[] values;

public DependentNodeCache(int minimumSize) {
super(minimumSize);

DependentNode[] x = new DependentNode[sizeMinusOne + 1];
values = x;

for (int i = 0; i < values.length; i++) {
values[i] = new DependentNode();
}
}

public DependentNode getOrClearIfAbsent(Object key) {
final int idx = calcIndex(key);
final DependentNode node = (DependentNode) values[idx];
if (node != null && node.equals(key)) {
return node;
} else {
node.encoded = null;
node.lookupPointer1 = 0;
node.lookupPointer2 = 0;
keys[idx] = key;
return node;
}
}

}
44 changes: 13 additions & 31 deletions core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
/**
* A terrifyingly simple cache.
*
* TODO: modifications
*
* Code copied from Apache Jena 5.2.0:
* https://github.com/apache/jena/blob/6443abda6e2717b95b05c45515817584e93ef244/jena-base/src/main/java/org/apache/jena/atlas/lib/cache/CacheSimple.java#L40
*
* TODO: license
*
* Authors:
* - Andy Seaborne
* - A. Soroka
Expand All @@ -19,47 +23,25 @@
* @param <K>
* @param <V>
*/
final class EncoderNodeCache<K, V> {
private final V[] values;
private final K[] keys;
private final int sizeMinusOne;
// private int currentSize = 0;
abstract class EncoderNodeCache<K, V> {
protected final Object[] keys;
protected final int sizeMinusOne;

public EncoderNodeCache(int minimumSize) {
protected EncoderNodeCache(int minimumSize) {
var size = Integer.highestOneBit(minimumSize);
if (size < minimumSize) {
size <<= 1;
}
this.sizeMinusOne = size-1;
this.sizeMinusOne = size - 1;

@SuppressWarnings("unchecked")
V[] x = (V[])new Object[size];
values = x;

@SuppressWarnings("unchecked")
K[] z = (K[])new Object[size];
K[] z = (K[]) new Object[size];
keys = z;
}

private int calcIndex(K key) {
protected int calcIndex(Object key) {
return key.hashCode() & sizeMinusOne;
}

public V computeIfAbsent(K key, Function<K, V> function) {
final int idx = calcIndex(key);
final boolean isExistingKeyNotNull = keys[idx] != null;
if (isExistingKeyNotNull && keys[idx].equals(key)) {
return values[idx];
} else {
final var value = function.apply(key);
if (value != null) {
values[idx] = value;
// if (!isExistingKeyNotNull) {
// currentSize++;
// }
keys[idx] = key;
}
return value;
}
}
}


28 changes: 6 additions & 22 deletions core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,6 @@
* @param <TNode> The type of RDF nodes used by the RDF library.
*/
public final class NodeEncoder<TNode> {
/**
* A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
*/
static final class DependentNode {
// The actual cached node
public UniversalTerm encoded;
// 1: datatypes and IRI names
// The pointer is the index in the lookup table, the serial is the serial number of the entry.
// The serial in the lookup table must be equal to the serial here for the entry to be valid.
public int lookupPointer1;
public int lookupSerial1;
// 2: IRI prefixes
public int lookupPointer2;
public int lookupSerial2;
}

private final int maxPrefixTableSize;
private int lastIriNameId;
private int lastIriPrefixId = -1000;
Expand All @@ -45,8 +29,8 @@ static final class DependentNode {

// We split the node caches in two – the first one is for nodes that depend on the lookups
// (IRIs and datatype literals). The second one is for nodes that don't depend on the lookups.
private final EncoderNodeCache<Object, DependentNode> dependentNodeCache;
private final EncoderNodeCache<Object, UniversalTerm> nodeCache;
private final DependentNodeCache dependentNodeCache;
private final OtherNodeCache nodeCache;

// Pre-allocated IRI that has prefixId=0 and nameId=0
static final RdfIri zeroIri = new RdfIri(0, 0);
Expand All @@ -64,8 +48,8 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int dependentNodeCac
prefixLookup = new EncoderLookup(maxPrefixTableSize);
}
nameLookup = new EncoderLookup(opt.maxNameTableSize());
dependentNodeCache = new EncoderNodeCache<>(dependentNodeCacheSize);
nodeCache = new EncoderNodeCache<>(nodeCacheSize);
dependentNodeCache = new DependentNodeCache(dependentNodeCacheSize);
nodeCache = new OtherNodeCache(nodeCacheSize);
}

/**
Expand All @@ -79,7 +63,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int dependentNodeCac
public UniversalTerm encodeDtLiteral(
TNode key, String lex, String datatypeName, ArrayBuffer<RdfStreamRow> rowsBuffer
) {
var cachedNode = dependentNodeCache.computeIfAbsent(key, k -> new DependentNode());
var cachedNode = dependentNodeCache.getOrClearIfAbsent(key);
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == datatypeLookup.table[cachedNode.lookupPointer1 * 3 + 2]
Expand Down Expand Up @@ -113,7 +97,7 @@ public UniversalTerm encodeDtLiteral(
* @return The encoded IRI
*/
public UniversalTerm encodeIri(String iri, ArrayBuffer<RdfStreamRow> rowsBuffer) {
var cachedNode = dependentNodeCache.computeIfAbsent(iri, k -> new DependentNode());
var cachedNode = dependentNodeCache.getOrClearIfAbsent(iri);
// Check if the value is still valid
if (cachedNode.encoded != null &&
cachedNode.lookupSerial1 == nameLookup.table[cachedNode.lookupPointer1 * 3 + 2]
Expand Down
27 changes: 27 additions & 0 deletions core/src/main/java/eu/ostrzyciel/jelly/core/OtherNodeCache.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package eu.ostrzyciel.jelly.core;

import eu.ostrzyciel.jelly.core.proto.v1.*;
import java.util.function.Function;

public class OtherNodeCache extends EncoderNodeCache<Object, UniversalTerm> {
private final UniversalTerm[] values;

public OtherNodeCache(int minimumSize) {
super(minimumSize);

UniversalTerm[] x = new UniversalTerm[sizeMinusOne + 1];
values = x;
}

public UniversalTerm computeIfAbsent(Object key, Function<Object, UniversalTerm> function) {
final int idx = calcIndex(key);
if (keys[idx] != null && keys[idx].equals(key)) {
return values[idx];
} else {
final var value = function.apply(key);
values[idx] = value;
keys[idx] = key;
return value;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](val options: RdfS
// We assume by default that 32 rows should be enough to encode one statement.
// If not, the buffer will grow.
private val extraRowsBuffer = new ArrayBuffer[RdfStreamRow](32)
// Make the node cache size between 512 and 4096, depending on the user's maxNameTableSize.
private val nodeCacheSize = Math.max(Math.min(options.maxNameTableSize * 2, 4096), 256)
// Make the node cache size between 4096 and 8192, depending on the user's maxNameTableSize.
// Kind of. Fix this tomorrow.
private val nodeCacheSize = Math.max(Math.min(options.maxNameTableSize * 4, 8192), 4096)
private val nodeEncoder = new NodeEncoder[TNode](options, nodeCacheSize, nodeCacheSize)
private var emittedOptions = false

Expand Down

0 comments on commit dbd2395

Please sign in to comment.