From 54aa36fa08e16b2b1f1932779597cdb6b9fc3235 Mon Sep 17 00:00:00 2001 From: Ostrzyciel Date: Wed, 10 Jul 2024 10:44:26 +0200 Subject: [PATCH] Check minimum table sizes in stream options validator Name and datatype tables can be easily set to 0 size by accident, which will cause the encoder to blow up when it tries to encode anything. This commit introduces a hardcoded check to make sure the tables have a minimum reasonable size. --- .../ostrzyciel/jelly/core/JellyOptions.scala | 27 +++++++++++-------- .../jelly/core/ProtoDecoderSpec.scala | 26 ++++++++++++++++++ 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/eu/ostrzyciel/jelly/core/JellyOptions.scala b/core/src/main/scala/eu/ostrzyciel/jelly/core/JellyOptions.scala index 7818e274..341ddaaf 100644 --- a/core/src/main/scala/eu/ostrzyciel/jelly/core/JellyOptions.scala +++ b/core/src/main/scala/eu/ostrzyciel/jelly/core/JellyOptions.scala @@ -104,9 +104,9 @@ object JellyOptions: * - version (must be <= Constants.protoVersion and <= supportedOptions.version) * - generalized statements (must be <= supportedOptions.generalizedStatements) * - RDF star (must be <= supportedOptions.rdfStar) - * - max name table size (must be <= supportedOptions.maxNameTableSize). + * - max name table size (must be <= supportedOptions.maxNameTableSize and >= 16). * - max prefix table size (must be <= supportedOptions.maxPrefixTableSize) - * - max datatype table size (must be <= supportedOptions.maxDatatypeTableSize) + * - max datatype table size (must be <= supportedOptions.maxDatatypeTableSize and >= 8) * - logical stream type (must be compatible with physical stream type and compatible with expected log. stream type) * * We don't check: @@ -128,23 +128,28 @@ object JellyOptions: s"This library version supports up to version ${Constants.protoVersion}.") if requestedOptions.generalizedStatements && !supportedOptions.generalizedStatements then - throw new RdfProtoDeserializationError(s"The stream uses generalized statements, which the user marked as not " + - s"supported. To read this stream, set generalizedStatements to true in the supportedOptions for this decoder.") + throw new RdfProtoDeserializationError(s"The stream uses generalized statements, which are not supported. " + + s"Either disable generalized statements or enable them in the supportedOptions.") if requestedOptions.rdfStar && !supportedOptions.rdfStar then - throw new RdfProtoDeserializationError(s"The stream uses RDF-star, which the user marked as not supported. " + - s"To read this stream, set rdfStar to true in the supportedOptions for this decoder.") + throw new RdfProtoDeserializationError(s"The stream uses RDF-star, which is not supported. Either disable" + + s" RDF-star or enable it in the supportedOptions.") - def checkTableSize(name: String, size: Int, supportedSize: Int): Unit = + def checkTableSize(name: String, size: Int, supportedSize: Int, minSize: Int = 0): Unit = if size > supportedSize then throw new RdfProtoDeserializationError(s"The stream uses a ${name.toLowerCase} table size of $size, which is " + - s"larger than the maximum supported size of $supportedSize. To read this stream, set max${name}TableSize " + - s"to at least $size in the supportedOptions for this decoder." + s"larger than the maximum supported size of $supportedSize." + ) + if size < minSize then + throw new RdfProtoDeserializationError(s"The stream uses a ${name.toLowerCase} table size of $size, which is " + + s"smaller than the minimum supported size of $minSize." ) - checkTableSize("Name", requestedOptions.maxNameTableSize, supportedOptions.maxNameTableSize) + // The minimum sizes are hard-coded because it would be impossible to reliably encode the stream + // with smaller tables, especially if RDF-star is used. + checkTableSize("Name", requestedOptions.maxNameTableSize, supportedOptions.maxNameTableSize, 16) checkTableSize("Prefix", requestedOptions.maxPrefixTableSize, supportedOptions.maxPrefixTableSize) - checkTableSize("Datatype", requestedOptions.maxDatatypeTableSize, supportedOptions.maxDatatypeTableSize) + checkTableSize("Datatype", requestedOptions.maxDatatypeTableSize, supportedOptions.maxDatatypeTableSize, 8) checkLogicalStreamType(requestedOptions, supportedOptions.logicalType) diff --git a/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index df40aa38..0f60e4b5 100644 --- a/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -596,4 +596,30 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: error.getMessage should include("datatype table size of 100") error.getMessage should include("larger than the maximum supported size of 80") } + + "throw exception on a stream with a name table size smaller than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxNameTableSize(2) // 16 is the minimum + )) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include("name table size of 2") + error.getMessage should include("smaller than the minimum supported size of 16") + } + + "throw exception on a stream with a datatype table size smaller than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxDatatypeTableSize(2) // 8 is the minimum + )) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include("datatype table size of 2") + error.getMessage should include("smaller than the minimum supported size of 8") + } }