Skip to content

Commit

Permalink
Use tableswitches in term encoding (Jelly-RDF#199)
Browse files Browse the repository at this point in the history
Same optimization as in Jelly-RDF#198 – instead of going through if-else-if chains with multiple virtual method calls to if* methods, do one virtual method call to `termNumber` and then use the tableswitch instruction.
  • Loading branch information
Ostrzyciel authored Oct 24, 2024
1 parent f25c778 commit 39e1f31
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 97 deletions.
55 changes: 29 additions & 26 deletions core/src/main/scala/eu/ostrzyciel/jelly/core/ProtoDecoderImpl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,37 +50,40 @@ sealed abstract class ProtoDecoderImpl[TNode, TDatatype : ClassTag, +TTriple, +T
private final def convertTerm(term: SpoTerm): TNode =
if term == null then
throw new RdfProtoDeserializationError("Term value is not set inside a quoted triple.")
else if term.isIri then
converter.makeIriNode(nameDecoder.decode(term.iri))
else if term.isBnode then
converter.makeBlankNode(term.bnode)
else if term.isLiteral then
convertLiteral(term.literal)
else if term.isTripleTerm then
val inner = term.tripleTerm
// ! No support for repeated terms in quoted triples
converter.makeTripleNode(
convertTerm(inner.subject),
convertTerm(inner.predicate),
convertTerm(inner.`object`),
)
else
throw new RdfProtoDeserializationError("Unknown term type.")
else (term.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
converter.makeIriNode(nameDecoder.decode(term.iri))
case RdfTerm.TERM_BNODE =>
converter.makeBlankNode(term.bnode)
case RdfTerm.TERM_LITERAL =>
convertLiteral(term.literal)
case RdfTerm.TERM_TRIPLE =>
val inner = term.tripleTerm
// ! No support for repeated terms in quoted triples
converter.makeTripleNode(
convertTerm(inner.subject),
convertTerm(inner.predicate),
convertTerm(inner.`object`),
)
case _ =>
throw new RdfProtoDeserializationError("Unknown term type.")


protected final def convertGraphTerm(graph: GraphTerm): TNode =
if graph == null then
throw new RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream.")
else if graph.isIri then
converter.makeIriNode(nameDecoder.decode(graph.iri))
else if graph.isDefaultGraph then
converter.makeDefaultGraphNode()
else if graph.isBnode then
converter.makeBlankNode(graph.bnode)
else if graph.isLiteral then
convertLiteral(graph.literal)
else
throw new RdfProtoDeserializationError("Unknown graph term type.")
else (graph.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
converter.makeIriNode(nameDecoder.decode(graph.iri))
case RdfTerm.TERM_BNODE =>
converter.makeBlankNode(graph.bnode)
case RdfTerm.TERM_LITERAL =>
convertLiteral(graph.literal)
case RdfTerm.TERM_DEFAULT_GRAPH =>
converter.makeDefaultGraphNode()
case _ =>
throw new RdfProtoDeserializationError("Unknown graph term type.")


protected final def convertTermWrapped(term: SpoTerm, lastNodeHolder: LastNodeHolder[TNode]): TNode =
if term == null then
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package eu.ostrzyciel.jelly.core.proto.v1

private[core] trait RdfStreamRowValue:

/**
* Returns the internal stream row value number, which is used in switch statements to determine the type of the row.
* This is NOT guaranteed to be the same as the field number in the protobuf encoding!
* (although this is the case in the current implementation)
* The values returned by this method may change in future versions of Jelly-JVM without warning.
* @return
*/
def streamRowValueNumber: Int

def isOptions: Boolean = false
Expand Down
140 changes: 82 additions & 58 deletions core/src/main/scala/eu/ostrzyciel/jelly/core/proto/v1/RdfTerm.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,23 @@ package eu.ostrzyciel.jelly.core.proto.v1
import com.google.protobuf.CodedOutputStream
import eu.ostrzyciel.jelly.core.proto.v1.*

import scala.annotation.switch

/**
* Trait enabling access into the fields of RDF terms (subjects, predicates, objects, graphs) in the
* protobuf encoding.
*
* See also [[eu.ostrzyciel.jelly.core.proto_adapters.RdfTermCompanion]].
*/
sealed trait RdfTerm:
/**
* Returns the internal term number, which is used in switch statements to determine the type of the term.
* This is NOT the field number in the protobuf encoding!
* The values returned by this method may change in future versions of Jelly-JVM without warning.
* @return the term number
*/
def termNumber: Int

def isIri: Boolean = false
def isBnode: Boolean = false
def isLiteral: Boolean = false
Expand All @@ -34,11 +44,19 @@ trait GraphTerm extends RdfTerm:
trait UniversalTerm extends SpoTerm, GraphTerm

object RdfTerm:
// Inlined constants for term numbers (.termNumber)
private[core] inline val TERM_IRI = 1
private[core] inline val TERM_BNODE = 2
private[core] inline val TERM_LITERAL = 3
private[core] inline val TERM_TRIPLE = 4
private[core] inline val TERM_DEFAULT_GRAPH = 5

/**
* Wrapper class for blank nodes, because in the proto they are simply represented as strings, and
* we cannot inherit from String. We must use a wrapper.
*/
final case class Bnode(override val bnode: String) extends UniversalTerm:
override def termNumber: Int = 2
override def isBnode: Boolean = true

// Methods below are used in RdfTriple, RdfQuad, and RdfGraphStart instead of generated code. They are all
Expand All @@ -48,70 +66,76 @@ private[v1] inline def fieldTagSize(inline tag: Int) = if tag < 16 then 1 else 2

private[v1] inline def graphTermSerializedSize(g: GraphTerm, inline tagOffset: Int): Int =
if g == null then 0
else if g.isIri then
val iriS = g.iri.serializedSize
fieldTagSize(1 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(iriS) + iriS
else if g.isDefaultGraph then
val dgS = g.defaultGraph.serializedSize
fieldTagSize(3 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(dgS) + dgS
else if g.isBnode then
CodedOutputStream.computeStringSize(2 + tagOffset, g.bnode)
else if g.isLiteral then
val litS = g.literal.serializedSize
fieldTagSize(4 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(litS) + litS
else 0
else (g.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
val iriS = g.iri.serializedSize
fieldTagSize(1 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(iriS) + iriS
case RdfTerm.TERM_BNODE =>
CodedOutputStream.computeStringSize(2 + tagOffset, g.bnode)
case RdfTerm.TERM_LITERAL =>
val litS = g.literal.serializedSize
fieldTagSize(4 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(litS) + litS
case RdfTerm.TERM_DEFAULT_GRAPH =>
val dgS = g.defaultGraph.serializedSize
fieldTagSize(3 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(dgS) + dgS
case _ => 0

private[v1] inline def graphTermWriteTo(g: GraphTerm, inline tagOffset: Int, out: CodedOutputStream): Unit =
if g == null then ()
else if g.isIri then
val iri = g.iri
out.writeTag(1 + tagOffset, 2)
out.writeUInt32NoTag(iri.serializedSize)
iri.writeTo(out)
else if g.isDefaultGraph then
val defaultGraph = g.defaultGraph
out.writeTag(3 + tagOffset, 2)
out.writeUInt32NoTag(defaultGraph.serializedSize)
defaultGraph.writeTo(out)
else if g.isBnode then
out.writeString(2 + tagOffset, g.bnode)
else if g.isLiteral then
val literal = g.literal
out.writeTag(4 + tagOffset, 2)
out.writeUInt32NoTag(literal.serializedSize)
literal.writeTo(out)
else (g.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
val iri = g.iri
out.writeTag(1 + tagOffset, 2)
out.writeUInt32NoTag(iri.serializedSize)
iri.writeTo(out)
case RdfTerm.TERM_BNODE =>
out.writeString(2 + tagOffset, g.bnode)
case RdfTerm.TERM_LITERAL =>
val literal = g.literal
out.writeTag(4 + tagOffset, 2)
out.writeUInt32NoTag(literal.serializedSize)
literal.writeTo(out)
case RdfTerm.TERM_DEFAULT_GRAPH =>
val defaultGraph = g.defaultGraph
out.writeTag(3 + tagOffset, 2)
out.writeUInt32NoTag(defaultGraph.serializedSize)
defaultGraph.writeTo(out)
case _ => ()

private[v1] inline def spoTermSerializedSize(t: SpoTerm, inline tagOffset: Int) =
if t == null then 0
else if t.isIri then
val iriS = t.iri.serializedSize
fieldTagSize(1 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(iriS) + iriS
else if t.isBnode then
CodedOutputStream.computeStringSize(2 + tagOffset, t.bnode)
else if t.isLiteral then
val literalS = t.literal.serializedSize
fieldTagSize(3 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(literalS) + literalS
else if t.isTripleTerm then
val tripleS = t.tripleTerm.serializedSize
fieldTagSize(4 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(tripleS) + tripleS
else 0
else (t.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
val iriS = t.iri.serializedSize
fieldTagSize(1 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(iriS) + iriS
case RdfTerm.TERM_BNODE =>
CodedOutputStream.computeStringSize(2 + tagOffset, t.bnode)
case RdfTerm.TERM_LITERAL =>
val literalS = t.literal.serializedSize
fieldTagSize(3 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(literalS) + literalS
case RdfTerm.TERM_TRIPLE =>
val tripleS = t.tripleTerm.serializedSize
fieldTagSize(4 + tagOffset) + CodedOutputStream.computeUInt32SizeNoTag(tripleS) + tripleS
case _ => 0

private[v1] inline def spoTermWriteTo(t: SpoTerm, inline tagOffset: Int, out: CodedOutputStream): Unit =
if t == null then ()
else if t.isIri then
val iri = t.iri
out.writeTag(1 + tagOffset, 2)
out.writeUInt32NoTag(iri.serializedSize)
iri.writeTo(out)
else if t.isBnode then
out.writeString(2 + tagOffset, t.bnode)
else if t.isLiteral then
val literal = t.literal
out.writeTag(3 + tagOffset, 2)
out.writeUInt32NoTag(literal.serializedSize)
literal.writeTo(out)
else if t.isTripleTerm then
val triple = t.tripleTerm
out.writeTag(4 + tagOffset, 2)
out.writeUInt32NoTag(triple.serializedSize)
triple.writeTo(out)
else (t.termNumber : @switch) match
case RdfTerm.TERM_IRI =>
val iri = t.iri
out.writeTag(1 + tagOffset, 2)
out.writeUInt32NoTag(iri.serializedSize)
iri.writeTo(out)
case RdfTerm.TERM_BNODE =>
out.writeString(2 + tagOffset, t.bnode)
case RdfTerm.TERM_LITERAL =>
val literal = t.literal
out.writeTag(3 + tagOffset, 2)
out.writeUInt32NoTag(literal.serializedSize)
literal.writeTo(out)
case RdfTerm.TERM_TRIPLE =>
val triple = t.tripleTerm
out.writeTag(4 + tagOffset, 2)
out.writeUInt32NoTag(triple.serializedSize)
triple.writeTo(out)
case _ => ()
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ final case class RdfTriple(subject: SpoTerm = null, predicate: SpoTerm = null, `
override def isTriple: Boolean = true

override def triple: RdfTriple = this

override def termNumber: Int = 4
}

object RdfTriple extends scalapb.GeneratedMessageCompanion[eu.ostrzyciel.jelly.core.proto.v1.RdfTriple] {
Expand Down
40 changes: 28 additions & 12 deletions project/Transform3.scala
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import scala.meta.*
*/
object Transform3 {
val transformer: Transformer = new Transformer {
def copyTemplate(templ: Template, traits: Seq[String], name: String, isName: String, number: Option[Int] = None):
Template = {
def copyTemplate(templ: Template, traits: Seq[String], name: String, isName: String,
number: Option[(String, Int)] = None): Template = {
templ.copy(
inits = templ.inits ++ traits.map { tName =>
Init.After_4_6_0(Type.Name(tName), Name.Anonymous(), Nil)
Expand All @@ -27,10 +27,10 @@ object Transform3 {
None,
Term.This(Name.Anonymous()),
),
) ++ number.map { n =>
) ++ number.map { case (name, n) =>
Defn.Def.After_4_7_3(
List(Mod.Override()),
Term.Name("streamRowValueNumber"),
Term.Name(name),
Nil,
None,
Lit.Int(n),
Expand All @@ -43,16 +43,32 @@ object Transform3 {
case Defn.Class.After_4_6_0(_, Type.Name(name), _, _, templ) =>
val newTempl = name match {
// RdfTerm
case "RdfIri" => Some(copyTemplate(templ, Seq("UniversalTerm"), "iri", "isIri"))
case "RdfLiteral" => Some(copyTemplate(templ, Seq("UniversalTerm"), "literal", "isLiteral"))
case "RdfDefaultGraph" => Some(copyTemplate(templ, Seq("GraphTerm"), "defaultGraph", "isDefaultGraph"))
case "RdfIri" => Some(copyTemplate(
templ, Seq("UniversalTerm"), "iri", "isIri", Some(("termNumber", 1))
))
case "RdfLiteral" => Some(copyTemplate(
templ, Seq("UniversalTerm"), "literal", "isLiteral", Some(("termNumber", 3))
))
case "RdfDefaultGraph" => Some(copyTemplate(
templ, Seq("GraphTerm"), "defaultGraph", "isDefaultGraph", Some(("termNumber", 5))
))

// RdfStreamRowValue
case "RdfStreamOptions" => Some(copyTemplate(templ, Seq("RdfStreamRowValue"), "options", "isOptions", Some(1)))
case "RdfGraphEnd" => Some(copyTemplate(templ, Seq("RdfStreamRowValue"), "graphEnd", "isGraphEnd", Some(5)))
case "RdfNameEntry" => Some(copyTemplate(templ, Seq("RdfStreamRowValue"), "name", "isName", Some(9)))
case "RdfPrefixEntry" => Some(copyTemplate(templ, Seq("RdfStreamRowValue"), "prefix", "isPrefix", Some(10)))
case "RdfDatatypeEntry" => Some(copyTemplate(templ, Seq("RdfStreamRowValue"), "datatype", "isDatatype", Some(11)))
case "RdfStreamOptions" => Some(copyTemplate(
templ, Seq("RdfStreamRowValue"), "options", "isOptions", Some(("streamRowValueNumber", 1))
))
case "RdfGraphEnd" => Some(copyTemplate(
templ, Seq("RdfStreamRowValue"), "graphEnd", "isGraphEnd", Some(("streamRowValueNumber", 5))
))
case "RdfNameEntry" => Some(copyTemplate(
templ, Seq("RdfStreamRowValue"), "name", "isName", Some(("streamRowValueNumber", 9))
))
case "RdfPrefixEntry" => Some(copyTemplate(
templ, Seq("RdfStreamRowValue"), "prefix", "isPrefix", Some(("streamRowValueNumber", 10))
))
case "RdfDatatypeEntry" => Some(copyTemplate(
templ, Seq("RdfStreamRowValue"), "datatype", "isDatatype", Some(("streamRowValueNumber", 11))
))
case _ => None
}
newTempl.map(templ => tree.asInstanceOf[Defn.Class].copy(templ = templ)).getOrElse(tree)
Expand Down

0 comments on commit 39e1f31

Please sign in to comment.