Skip to content

Commit

Permalink
Faster dimension deserialization on the brokers (apache#16740)
Browse files Browse the repository at this point in the history
Speedier dimension deserialization on the brokers.
  • Loading branch information
LakshSingla authored and sreemanamala committed Aug 6, 2024
1 parent fd25b8c commit 5f3d5f5
Show file tree
Hide file tree
Showing 9 changed files with 530 additions and 109 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.benchmark;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableMap;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.guice.NestedDataModule;
import org.apache.druid.jackson.AggregatorsModule;
import org.apache.druid.java.util.common.DateTimes;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.granularity.Granularities;
import org.apache.druid.query.QueryRunnerTestHelper;
import org.apache.druid.query.aggregation.SerializablePairLongString;
import org.apache.druid.query.aggregation.post.ConstantPostAggregator;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.groupby.GroupByQuery;
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.GroupByQueryQueryToolChest;
import org.apache.druid.query.groupby.ResultRow;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnType;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@Fork(value = 1)
@Warmup(iterations = 3)
@Measurement(iterations = 5)
public class GroupByDeserializationBenchmark
{

static {
NullHandling.initializeForTests();
NestedDataModule.registerHandlersAndSerde();
AggregatorsModule.registerComplexMetricsAndSerde();
}

@Param({"100", "1000"})
private int numDimensions;

@Param({"0", "0.25", "0.5", "0.75", "0.85", "0.95", "0.99", "1.0"})
private double primitiveToComplexDimensionRatio;

@Param({"json", "serializablePairLongString"})
private String complexDimensionType;

@Param({"true", "false"})
private boolean backwardCompatibility;

private GroupByQuery sqlQuery;
private String serializedRow;
private GroupByQueryQueryToolChest groupByQueryQueryToolChest;
private ObjectMapper decoratedMapper;

@Setup(Level.Trial)
public void setup() throws JsonProcessingException
{
final ObjectMapper undecoratedMapper = TestHelper.makeJsonMapper();
undecoratedMapper.registerModules(NestedDataModule.getJacksonModulesList());
undecoratedMapper.registerModule(new AggregatorsModule());
final Pair<GroupByQuery, String> sqlQueryAndResultRow = sqlQueryAndResultRow(
numDimensions,
primitiveToComplexDimensionRatio,
complexDimensionType,
undecoratedMapper
);
sqlQuery = sqlQueryAndResultRow.lhs;
serializedRow = sqlQueryAndResultRow.rhs;

groupByQueryQueryToolChest = new GroupByQueryQueryToolChest(
null,
() -> new GroupByQueryConfig()
{
@Override
public boolean isIntermediateResultAsMapCompat()
{
return backwardCompatibility;
}
},
null,
null
);

decoratedMapper = groupByQueryQueryToolChest.decorateObjectMapper(undecoratedMapper, sqlQuery);
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void deserializeResultRows(Blackhole blackhole) throws JsonProcessingException
{
blackhole.consume(decoratedMapper.readValue(serializedRow, ResultRow.class));
}

private static Pair<GroupByQuery, String> sqlQueryAndResultRow(
final int numDimensions,
final double primitiveToComplexDimensionRatio,
final String complexDimensionType,
final ObjectMapper mapper
) throws JsonProcessingException
{
final int numPrimitiveDimensions = (int) Math.floor(primitiveToComplexDimensionRatio * numDimensions);
final int numComplexDimensions = numDimensions - numPrimitiveDimensions;

final List<DimensionSpec> dimensions = new ArrayList<>();
final List<Object> rowList = new ArrayList<>();

// Add timestamp
rowList.add(DateTimes.of("2000").getMillis());

for (int i = 0; i < numPrimitiveDimensions; ++i) {
dimensions.add(
new DefaultDimensionSpec(
StringUtils.format("primitive%d", i),
StringUtils.format("primitive%d", i),
ColumnType.STRING
)
);
rowList.add("foo");
}

for (int i = 0; i < numComplexDimensions; ++i) {
dimensions.add(
new DefaultDimensionSpec(
StringUtils.format("complex%d", i),
StringUtils.format("complex%d", i),
ColumnType.ofComplex(complexDimensionType)
)
);

// Serialized version of this object is a valid value for both json and long-string pair dimensions
rowList.add(new SerializablePairLongString(1L, "test"));
}

// Add aggregator
rowList.add(100);

// Add post aggregator
rowList.add(10.0);

GroupByQuery query = GroupByQuery.builder()
.setDataSource("foo")
.setQuerySegmentSpec(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC)
.setDimensions(dimensions)
.setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT)
.setPostAggregatorSpecs(Collections.singletonList(new ConstantPostAggregator(
"post",
10
)))
.setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_ARRAY_RESULT_ROWS, true))
.setGranularity(Granularities.DAY)
.build();

return Pair.of(query, mapper.writeValueAsString(rowList));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
package org.apache.druid.java.util.common.jackson;

import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.JavaType;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.JsonSerializer;
Expand Down Expand Up @@ -122,6 +125,40 @@ public static void writeObjectUsingSerializerProvider(
}
}

/**
* Reads an object using the {@link JsonParser}. It reuses the provided {@link DeserializationContext} which offers
* better performance that calling {@link JsonParser#readValueAs(Class)} because it avoids re-creating the {@link DeserializationContext}
* for each readValue call
*/
@Nullable
public static <T> T readObjectUsingDeserializationContext(
final JsonParser jp,
final DeserializationContext deserializationContext,
final Class<T> clazz
) throws IOException
{
if (jp.currentToken() == JsonToken.VALUE_NULL) {
return null;
}
return deserializationContext.readValue(jp, clazz);
}

/**
* @see #readObjectUsingDeserializationContext(JsonParser, DeserializationContext, Class)
*/
@Nullable
public static Object readObjectUsingDeserializationContext(
final JsonParser jp,
final DeserializationContext deserializationContext,
final JavaType javaType
) throws IOException
{
if (jp.currentToken() == JsonToken.VALUE_NULL) {
return null;
}
return deserializationContext.readValue(jp, javaType);
}

/**
* Convert the given object to an array of bytes. Use when the object is
* known serializable so that the Jackson exception can be suppressed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ public final JavaType getBySegmentResultType()
* For most queries, this is a no-op, but it can be useful for query types that support more than one result
* serialization format. Queries that implement this method must not modify the provided ObjectMapper, but instead
* must return a copy.
* <p>
* Jackson's default implementation of deserialization is usually optimised and this method should be overriden
* only if there is a functional requirement of so. The method must be benchmarked in isolation, without other portions
* of the query engine executing as modifying this method can alter the performance of queries where deserializing is
* a major portion of the execution.
*/
public ObjectMapper decorateObjectMapper(final ObjectMapper objectMapper, final QueryType query)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,6 @@ public boolean isVectorize()
return vectorize;
}

@SuppressWarnings("unused")
public boolean isIntermediateResultAsMapCompat()
{
return intermediateResultAsMapCompat;
Expand Down
Loading

0 comments on commit 5f3d5f5

Please sign in to comment.