Revert "Experiment: again try a lighter impl for encoder node cache (J…

…elly-RDF#212)" Of course, this doesn't work. I have no idea how the default hashmaps can be so good, but I bow to the ancient masters of Java. I give up. This reverts commit c2bad54.
scala-steward · Nov 8, 2024 · 2d428d8 · 2d428d8
1 parent c2bad54
commit 2d428d8
Show file tree

Hide file tree

Showing 5 changed files with 47 additions and 116 deletions.
diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCache.java b/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCache.java
diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCacheDependent.java b/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCacheDependent.java
diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCacheSimple.java b/core/src/main/java/eu/ostrzyciel/jelly/core/EncoderNodeCacheSimple.java
diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java
@@ -19,6 +19,40 @@
  * @param <TNode> The type of RDF nodes used by the RDF library.
  */
 public final class NodeEncoder<TNode> {
+    /**
+     * A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
+     */
+    static final class DependentNode {
+        // The actual cached node
+        public UniversalTerm encoded;
+        // 1: datatypes and IRI names
+        // The pointer is the index in the lookup table, the serial is the serial number of the entry.
+        // The serial in the lookup table must be equal to the serial here for the entry to be valid.
+        public int lookupPointer1;
+        public int lookupSerial1;
+        // 2: IRI prefixes
+        public int lookupPointer2;
+        public int lookupSerial2;
+    }
+
+    /**
+     * A simple LRU cache for already encoded nodes.
+     * @param <K> Key type
+     * @param <V> Value type
+     */
+    private static final class NodeCache<K, V> extends LinkedHashMap<K, V> {
+        private final int maxSize;
+
+        public NodeCache(int maxSize) {
+            this.maxSize = maxSize;
+        }
+
+        @Override
+        protected boolean removeEldestEntry(java.util.Map.Entry<K, V> eldest) {
+            return size() > maxSize;
+        }
+    }
+
     private final int maxPrefixTableSize;
     private int lastIriNameId;
     private int lastIriPrefixId = -1000;
@@ -29,9 +63,9 @@ public final class NodeEncoder<TNode> {
 
     // We split the node caches in three – the first two are for nodes that depend on the lookups
     // (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
-    private final EncoderNodeCacheDependent iriNodeCache;
-    private final EncoderNodeCacheDependent dtLiteralNodeCache;
-    private final EncoderNodeCacheSimple nodeCache;
+    private final NodeCache<Object, DependentNode> iriNodeCache;
+    private final NodeCache<Object, DependentNode> dtLiteralNodeCache;
+    private final NodeCache<Object, UniversalTerm> nodeCache;
 
     // Pre-allocated IRI that has prefixId=0 and nameId=0
     static final RdfIri zeroIri = new RdfIri(0, 0);
@@ -50,7 +84,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
         this.maxPrefixTableSize = opt.maxPrefixTableSize();
         if (maxPrefixTableSize > 0) {
             prefixLookup = new EncoderLookup(maxPrefixTableSize, true);
-            iriNodeCache = new EncoderNodeCacheDependent(iriNodeCacheSize);
+            iriNodeCache = new NodeCache<>(iriNodeCacheSize);
         } else {
             prefixLookup = null;
             iriNodeCache = null;
@@ -59,9 +93,9 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
         for (int i = 0; i < nameOnlyIris.length; i++) {
             nameOnlyIris[i] = new RdfIri(0, i);
         }
-        dtLiteralNodeCache = new EncoderNodeCacheDependent(dtLiteralNodeCacheSize);
+        dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize);
         nameLookup = new EncoderLookup(opt.maxNameTableSize(), maxPrefixTableSize > 0);
-        nodeCache = new EncoderNodeCacheSimple(nodeCacheSize);
+        nodeCache = new NodeCache<>(nodeCacheSize);
     }
 
     /**
@@ -75,7 +109,7 @@ public NodeEncoder(RdfStreamOptions opt, int nodeCacheSize, int iriNodeCacheSize
     public UniversalTerm encodeDtLiteral(
             TNode key, String lex, String datatypeName, ArrayBuffer<RdfStreamRow> rowsBuffer
     ) {
-        var cachedNode = dtLiteralNodeCache.getOrClearIfAbsent(key);
+        var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode());
         // Check if the value is still valid
         if (cachedNode.encoded != null &&
                 cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1]
@@ -127,7 +161,7 @@ public UniversalTerm encodeIri(String iri, ArrayBuffer<RdfStreamRow> rowsBuffer)
         }
 
         // Slow path, with splitting out the prefix
-        var cachedNode = iriNodeCache.getOrClearIfAbsent(iri);
+        var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode());
         // Check if the value is still valid
         if (cachedNode.encoded != null &&
                 cachedNode.lookupSerial1 == nameLookup.serials[cachedNode.lookupPointer1] &&
@@ -212,6 +246,6 @@ private UniversalTerm outputIri(DependentNode cachedNode) {
      * @return The encoded node
      */
     public UniversalTerm encodeOther(Object key, Function<Object, UniversalTerm> encoder) {
-        return nodeCache.getOrComputeIfAbsent(key, encoder);
+        return nodeCache.computeIfAbsent(key, encoder);
     }
 }
diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoEncoder.scala
@@ -144,11 +144,10 @@ abstract class ProtoEncoder[TNode, -TTriple, -TQuad, -TQuoted](val options: RdfS
   private val extraRowsBuffer = new ArrayBuffer[RdfStreamRow](32)
   private val nodeEncoder = new NodeEncoder[TNode](
     options,
-    // Make the node cache size between 256 and 2048, depending on the user's maxNameTableSize.
-    Math.max(Math.min(options.maxNameTableSize, 2048), 512),
-    // IRI cache can be the largest...
-    Math.max(Math.min((options.maxNameTableSize * 1.5).toInt, 8192), 512),
-    Math.max(Math.min(options.maxNameTableSize, 2048), 512),
+    // Make the node cache size between 256 and 1024, depending on the user's maxNameTableSize.
+    Math.max(Math.min(options.maxNameTableSize, 1024), 256),
+    options.maxNameTableSize,
+    Math.max(Math.min(options.maxNameTableSize, 1024), 256),
   )
   private var emittedOptions = false