From 4588d0b15db53553f25615d9ec2d552173143b8a Mon Sep 17 00:00:00 2001 From: Gonzalo Ortiz Jaureguizar Date: Fri, 8 Nov 2024 15:39:18 +0100 Subject: [PATCH] Modify AggregationPlanNode to consider not nullable columns that do not contain nulls (#14342) --- .../pinot/core/plan/AggregationPlanNode.java | 40 +++++++++++++++++-- 1 file changed, 37 insertions(+), 3 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java index 2a1321f1b97..6202f0890d0 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/plan/AggregationPlanNode.java @@ -36,6 +36,7 @@ import org.apache.pinot.segment.spi.IndexSegment; import org.apache.pinot.segment.spi.SegmentContext; import org.apache.pinot.segment.spi.datasource.DataSource; +import org.apache.pinot.segment.spi.index.reader.NullValueVectorReader; import static org.apache.pinot.segment.spi.AggregationFunctionType.*; @@ -94,7 +95,8 @@ public Operator buildNonFilteredAggOperator() { FilterPlanNode filterPlanNode = new FilterPlanNode(_segmentContext, _queryContext); BaseFilterOperator filterOperator = filterPlanNode.run(); - if (!_queryContext.isNullHandlingEnabled()) { + boolean hasNullValues = _queryContext.isNullHandlingEnabled() && hasNullValues(aggregationFunctions); + if (!hasNullValues) { if (canOptimizeFilteredCount(filterOperator, aggregationFunctions)) { return new FastFilteredCountOperator(_queryContext, filterOperator, _indexSegment.getSegmentMetadata()); } @@ -118,17 +120,49 @@ public Operator buildNonFilteredAggOperator() { return new AggregationOperator(_queryContext, aggregationInfo, numTotalDocs); } + /** + * Returns {@code true} if any of the aggregation functions have null values, {@code false} otherwise. + * + * The current implementation is pessimistic and returns {@code true} if any of the arguments to the aggregation + * functions is of function type. This is because we do not have a way to determine if the function will return null + * values without actually evaluating it. + */ + private boolean hasNullValues(AggregationFunction[] aggregationFunctions) { + for (AggregationFunction aggregationFunction : aggregationFunctions) { + for (ExpressionContext argument : aggregationFunction.getInputExpressions()) { + switch (argument.getType()) { + case IDENTIFIER: + DataSource dataSource = _indexSegment.getDataSource(argument.getIdentifier()); + NullValueVectorReader nullValueVector = dataSource.getNullValueVector(); + if (nullValueVector != null && !nullValueVector.getNullBitmap().isEmpty()) { + return true; + } + break; + case LITERAL: + if (argument.getLiteral().isNull()) { + return true; + } + break; + case FUNCTION: + default: + return true; + } + } + } + return false; + } + /** * Returns {@code true} if the given aggregations can be solved with dictionary or column metadata, {@code false} * otherwise. */ private static boolean isFitForNonScanBasedPlan(AggregationFunction[] aggregationFunctions, IndexSegment indexSegment) { - for (AggregationFunction aggregationFunction : aggregationFunctions) { + for (AggregationFunction aggregationFunction : aggregationFunctions) { if (aggregationFunction.getType() == COUNT) { continue; } - ExpressionContext argument = (ExpressionContext) aggregationFunction.getInputExpressions().get(0); + ExpressionContext argument = aggregationFunction.getInputExpressions().get(0); if (argument.getType() != ExpressionContext.Type.IDENTIFIER) { return false; }