From 3f4d66c3997619e96e52af90d82b9a5b96660a84 Mon Sep 17 00:00:00 2001 From: Sree Charan Manamala Date: Wed, 24 Jul 2024 08:43:22 +0530 Subject: [PATCH] Check for Unsupported Aggregation with Distinct when useApproxCountDistinct is enabled (#16770) * init * add NativelySupportsDistinct * refactor * javadoc * refactor * fix tests * fix drill tests * comments * Update sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java --------- Co-authored-by: Benedict Jin --- .../ApproxCountDistinctSqlAggregator.java | 1 + .../aggregation/NativelySupportsDistinct.java | 36 +++++++++++++++++++ .../builtin/ArrayConcatSqlAggregator.java | 2 ++ .../builtin/ArraySqlAggregator.java | 2 ++ .../builtin/StringSqlAggregator.java | 2 ++ .../calcite/planner/DruidSqlValidator.java | 21 +++++++++-- .../druid/sql/calcite/rule/GroupByRules.java | 12 +++++++ .../druid/sql/calcite/CalciteQueryTest.java | 14 ++++++++ .../sql/calcite/DrillWindowQueryTest.java | 4 +-- .../druid/sql/calcite/NotYetSupported.java | 2 +- 10 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/aggregation/NativelySupportsDistinct.java diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java index a8bae9698638..7d66eebcad1b 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/ApproxCountDistinctSqlAggregator.java @@ -80,6 +80,7 @@ public Aggregation toDruidAggregation( ); } + @NativelySupportsDistinct private static class ApproxCountDistinctSqlAggFunction extends SqlAggFunction { ApproxCountDistinctSqlAggFunction(SqlAggFunction delegate) diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/NativelySupportsDistinct.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/NativelySupportsDistinct.java new file mode 100644 index 000000000000..19bbaf8a0f26 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/NativelySupportsDistinct.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.aggregation; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +/** + * This annotation is to distinguish {@link org.apache.calcite.sql.SqlAggFunction} + * which supports the distinct aggregation natively + */ +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.TYPE}) +public @interface NativelySupportsDistinct +{ + +} diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArrayConcatSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArrayConcatSqlAggregator.java index a5e62f5e2a9b..d20999d3afc4 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArrayConcatSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArrayConcatSqlAggregator.java @@ -39,6 +39,7 @@ import org.apache.druid.segment.VirtualColumn; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.NativelySupportsDistinct; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -142,6 +143,7 @@ public Aggregation toDruidAggregation( } } + @NativelySupportsDistinct private static class ArrayConcatAggFunction extends SqlAggFunction { ArrayConcatAggFunction() diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java index efb84dca6251..1045a79870bb 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/ArraySqlAggregator.java @@ -41,6 +41,7 @@ import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.NativelySupportsDistinct; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -165,6 +166,7 @@ public RelDataType inferReturnType(SqlOperatorBinding sqlOperatorBinding) } } + @NativelySupportsDistinct private static class ArrayAggFunction extends SqlAggFunction { private static final ArrayAggReturnTypeInference RETURN_TYPE_INFERENCE = new ArrayAggReturnTypeInference(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java index a78b3a7a4797..49469decf996 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/StringSqlAggregator.java @@ -47,6 +47,7 @@ import org.apache.druid.query.filter.SelectorDimFilter; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.NativelySupportsDistinct; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.DruidExpression; import org.apache.druid.sql.calcite.expression.Expressions; @@ -226,6 +227,7 @@ public RelDataType inferReturnType(SqlOperatorBinding sqlOperatorBinding) } } + @NativelySupportsDistinct private static class StringAggFunction extends SqlAggFunction { private static final StringAggReturnTypeInference RETURN_TYPE_INFERENCE = new StringAggReturnTypeInference(); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java index 18638b32afd8..e00a2915a2e8 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/planner/DruidSqlValidator.java @@ -28,6 +28,7 @@ import org.apache.calcite.rel.type.RelRecordType; import org.apache.calcite.runtime.CalciteContextException; import org.apache.calcite.runtime.CalciteException; +import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlInsert; @@ -65,6 +66,7 @@ import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.ValueType; +import org.apache.druid.sql.calcite.aggregation.NativelySupportsDistinct; import org.apache.druid.sql.calcite.expression.builtin.ScalarInArrayOperatorConversion; import org.apache.druid.sql.calcite.parser.DruidSqlIngest; import org.apache.druid.sql.calcite.parser.DruidSqlInsert; @@ -760,8 +762,10 @@ public void validateCall(SqlCall call, SqlValidatorScope scope) throw buildCalciteContextException( StringUtils.format( "The query contains window functions; To run these window functions, specify [%s] in query context.", - PlannerContext.CTX_ENABLE_WINDOW_FNS), - call); + PlannerContext.CTX_ENABLE_WINDOW_FNS + ), + call + ); } } if (call.getKind() == SqlKind.NULLS_FIRST) { @@ -776,6 +780,19 @@ public void validateCall(SqlCall call, SqlValidatorScope scope) throw buildCalciteContextException("ASCENDING ordering with NULLS LAST is not supported!", call); } } + if (plannerContext.getPlannerConfig().isUseApproximateCountDistinct() && isSqlCallDistinct(call)) { + if (call.getOperator().getKind() != SqlKind.COUNT && call.getOperator() instanceof SqlAggFunction) { + if (!call.getOperator().getClass().isAnnotationPresent(NativelySupportsDistinct.class)) { + throw buildCalciteContextException( + StringUtils.format( + "Aggregation [%s] with DISTINCT is not supported when useApproximateCountDistinct is enabled. Run with disabling it.", + call.getOperator().getName() + ), + call + ); + } + } + } super.validateCall(call, scope); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rule/GroupByRules.java b/sql/src/main/java/org/apache/druid/sql/calcite/rule/GroupByRules.java index fecabd00ec39..f0632006d106 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/rule/GroupByRules.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/rule/GroupByRules.java @@ -22,11 +22,13 @@ import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlKind; import org.apache.druid.query.aggregation.AggregatorFactory; import org.apache.druid.query.aggregation.FilteredAggregatorFactory; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.segment.column.RowSignature; import org.apache.druid.sql.calcite.aggregation.Aggregation; +import org.apache.druid.sql.calcite.aggregation.NativelySupportsDistinct; import org.apache.druid.sql.calcite.aggregation.SqlAggregator; import org.apache.druid.sql.calcite.expression.Expressions; import org.apache.druid.sql.calcite.filtration.Filtration; @@ -69,6 +71,16 @@ public static Aggregation translateAggregateCall( return null; } + if (call.isDistinct() && call.getAggregation().getKind() != SqlKind.COUNT) { + if (!call.getAggregation().getClass().isAnnotationPresent(NativelySupportsDistinct.class)) { + plannerContext.setPlanningError( + "Aggregation [%s] with DISTINCT is not supported when useApproximateCountDistinct is enabled. Run with disabling it.", + call.getAggregation().getName() + ); + return null; + } + } + final DimFilter filter; if (call.filterArg >= 0) { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 29751279339f..7b5749bd8a16 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -15501,6 +15501,20 @@ public void testWindowingErrorWithoutFeatureFlag() assertThat(e, invalidSqlIs("The query contains window functions; To run these window functions, specify [enableWindowing] in query context. (line [1], column [13])")); } + @Test + public void testDistinctSumNotSupportedWithApproximation() + { + DruidException e = assertThrows( + DruidException.class, + () -> testBuilder() + .queryContext(ImmutableMap.of(PlannerConfig.CTX_KEY_USE_APPROXIMATE_COUNT_DISTINCT, true)) + .sql("SELECT sum(distinct m1) from druid.foo") + .run() + ); + + assertThat(e, invalidSqlContains("Aggregation [SUM] with DISTINCT is not supported")); + } + @Test public void testUnSupportedNullsFirst() { diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java index 24076d1cdbf9..1451d2495c9f 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java @@ -4426,7 +4426,7 @@ public void test_last_val_lastValFn_39() windowQueryTest(); } - @NotYetSupported(Modes.NOT_ENOUGH_RULES) + @NotYetSupported(Modes.DISTINCT_AGGREGATE_NOT_SUPPORTED) @DrillTest("nestedAggs/emtyOvrCls_7") @Test public void test_nestedAggs_emtyOvrCls_7() @@ -7274,7 +7274,7 @@ public void test_nestedAggs_emtyOvrCls_13() windowQueryTest(); } - @NotYetSupported(Modes.RESULT_MISMATCH) + @NotYetSupported(Modes.DISTINCT_AGGREGATE_NOT_SUPPORTED) @DrillTest("nestedAggs/emtyOvrCls_8") @Test public void test_nestedAggs_emtyOvrCls_8() diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java index eaa97be231f9..e5442a2bda24 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java @@ -77,7 +77,7 @@ enum Modes { // @formatter:off - NOT_ENOUGH_RULES(DruidException.class, "not enough rules"), + DISTINCT_AGGREGATE_NOT_SUPPORTED(DruidException.class, "DISTINCT is not supported"), ERROR_HANDLING(AssertionError.class, "targetPersona: is <[A-Z]+> and category: is <[A-Z_]+> and errorCode: is"), EXPRESSION_NOT_GROUPED(DruidException.class, "Expression '[a-z]+' is not being grouped"), NULLS_FIRST_LAST(DruidException.class, "NULLS (FIRST|LAST)"),