diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/GpuHyperLogLogPlusPlus.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/GpuHyperLogLogPlusPlus.scala index fe801955535..d43b6b54f58 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/GpuHyperLogLogPlusPlus.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/aggregate/GpuHyperLogLogPlusPlus.scala @@ -23,7 +23,7 @@ import ai.rapids.cudf.{DType, GroupByAggregation, ReductionAggregation} import com.nvidia.spark.rapids._ import com.nvidia.spark.rapids.Arm.withResourceIfAllowed import com.nvidia.spark.rapids.RapidsPluginImplicits.ReallyAGpuExpression -import com.nvidia.spark.rapids.jni.HLLPP +import com.nvidia.spark.rapids.jni.HLLPPHostUDF import com.nvidia.spark.rapids.shims.ShimExpression import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression} @@ -35,9 +35,13 @@ case class CudfHLLPP(override val dataType: DataType, precision: Int) extends CudfAggregate { override lazy val reductionAggregate: cudf.ColumnVector => cudf.Scalar = (input: cudf.ColumnVector) => input.reduce( - ReductionAggregation.HLLPP(precision), DType.STRUCT) + ReductionAggregation.hostUDF( + HLLPPHostUDF.createHLLPPHostUDF(HLLPPHostUDF.AggregationType.Reduction, precision)), + DType.STRUCT) override lazy val groupByAggregate: GroupByAggregation = - GroupByAggregation.HLLPP(precision) + GroupByAggregation.hostUDF( + HLLPPHostUDF.createHLLPPHostUDF(HLLPPHostUDF.AggregationType.GroupBy, precision) + ) override val name: String = "CudfHyperLogLogPlusPlus" } @@ -45,10 +49,14 @@ case class CudfMergeHLLPP(override val dataType: DataType, precision: Int) extends CudfAggregate { override lazy val reductionAggregate: cudf.ColumnVector => cudf.Scalar = - (input: cudf.ColumnVector) => - input.reduce(ReductionAggregation.mergeHLL(precision), DType.STRUCT) + (input: cudf.ColumnVector) => input.reduce( + ReductionAggregation.hostUDF( + HLLPPHostUDF.createHLLPPHostUDF(HLLPPHostUDF.AggregationType.Reduction_MERGE, precision)), + DType.STRUCT) override lazy val groupByAggregate: GroupByAggregation = - GroupByAggregation.mergeHLL(precision) + GroupByAggregation.hostUDF( + HLLPPHostUDF.createHLLPPHostUDF(HLLPPHostUDF.AggregationType.GroupByMerge, precision) + ) override val name: String = "CudfMergeHyperLogLogPlusPlus" } @@ -69,7 +77,7 @@ case class GpuHyperLogLogPlusPlusEvaluation(childExpr: Expression, override def columnarEval(batch: ColumnarBatch): GpuColumnVector = { withResourceIfAllowed(childExpr.columnarEval(batch)) { sketches => - val distinctValues = HLLPP.estimateDistinctValueFromSketches( + val distinctValues = HLLPPHostUDF.estimateDistinctValueFromSketches( sketches.getBase, precision) GpuColumnVector.from(distinctValues, LongType) } diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv index e5978fb9f1a..bf2cf533481 100644 --- a/tools/generated_files/330/operatorsScore.csv +++ b/tools/generated_files/330/operatorsScore.csv @@ -139,6 +139,7 @@ HiveGenericUDF,4 HiveHash,4 HiveSimpleUDF,4 Hour,4 +HyperLogLogPlusPlus,4 Hypot,4 If,4 In,4 diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv index fcea9c8cb40..77a76ca487d 100644 --- a/tools/generated_files/330/supportedExprs.csv +++ b/tools/generated_files/330/supportedExprs.csv @@ -707,6 +707,10 @@ First,S,`first_value`; `first`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,N First,S,`first_value`; `first`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS First,S,`first_value`; `first`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS First,S,`first_value`; `first`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS +HyperLogLogPlusPlus,S,`approx_count_distinct`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,NS,NS,NS,NS,NS,NS +HyperLogLogPlusPlus,S,`approx_count_distinct`,None,aggregation,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +HyperLogLogPlusPlus,S,`approx_count_distinct`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,NS,NS,NS,NS,NS,NS +HyperLogLogPlusPlus,S,`approx_count_distinct`,None,reduction,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Last,S,`last_value`; `last`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS Last,S,`last_value`; `last`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS Last,S,`last_value`; `last`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS