diff --git a/core/src/main/resources/operatorsScore-databricks-aws.csv b/core/src/main/resources/operatorsScore-databricks-aws.csv index b80514227..06a6b7d2d 100644 --- a/core/src/main/resources/operatorsScore-databricks-aws.csv +++ b/core/src/main/resources/operatorsScore-databricks-aws.csv @@ -75,6 +75,7 @@ CollectSet,2.45 Concat,2.45 ConcatWs,2.45 Contains,2.45 +Conv,2.45 Cos,2.45 Cosh,2.45 Cot,2.45 diff --git a/core/src/main/resources/operatorsScore-databricks-azure.csv b/core/src/main/resources/operatorsScore-databricks-azure.csv index efc7b7215..def5b1578 100644 --- a/core/src/main/resources/operatorsScore-databricks-azure.csv +++ b/core/src/main/resources/operatorsScore-databricks-azure.csv @@ -75,6 +75,7 @@ CollectSet,2.73 Concat,2.73 ConcatWs,2.73 Contains,2.73 +Conv,2.73 Cos,2.73 Cosh,2.73 Cot,2.73 diff --git a/core/src/main/resources/operatorsScore-dataproc-l4.csv b/core/src/main/resources/operatorsScore-dataproc-l4.csv index 0fdaea9d2..1231b196a 100644 --- a/core/src/main/resources/operatorsScore-dataproc-l4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-l4.csv @@ -75,6 +75,7 @@ CollectSet,4.16 Concat,4.16 ConcatWs,4.16 Contains,4.16 +Conv,4.16 Cos,4.16 Cosh,4.16 Cot,4.16 diff --git a/core/src/main/resources/operatorsScore-dataproc-t4.csv b/core/src/main/resources/operatorsScore-dataproc-t4.csv index 48ab44cab..d91bc9827 100644 --- a/core/src/main/resources/operatorsScore-dataproc-t4.csv +++ b/core/src/main/resources/operatorsScore-dataproc-t4.csv @@ -75,6 +75,7 @@ CollectSet,4.88 Concat,4.88 ConcatWs,4.88 Contains,4.88 +Conv,4.88 Cos,4.88 Cosh,4.88 Cot,4.88 diff --git a/core/src/main/resources/operatorsScore-emr-a10.csv b/core/src/main/resources/operatorsScore-emr-a10.csv index ba697fb92..3c58aa652 100644 --- a/core/src/main/resources/operatorsScore-emr-a10.csv +++ b/core/src/main/resources/operatorsScore-emr-a10.csv @@ -75,6 +75,7 @@ CollectSet,2.59 Concat,2.59 ConcatWs,2.59 Contains,2.59 +Conv,2.59 Cos,2.59 Cosh,2.59 Cot,2.59 diff --git a/core/src/main/resources/operatorsScore-emr-t4.csv b/core/src/main/resources/operatorsScore-emr-t4.csv index 1662703c8..3630cb5d2 100644 --- a/core/src/main/resources/operatorsScore-emr-t4.csv +++ b/core/src/main/resources/operatorsScore-emr-t4.csv @@ -75,6 +75,7 @@ CollectSet,2.07 Concat,2.07 ConcatWs,2.07 Contains,2.07 +Conv,2.07 Cos,2.07 Cosh,2.07 Cot,2.07 diff --git a/core/src/main/resources/operatorsScore.csv b/core/src/main/resources/operatorsScore.csv index 7c00bb79b..b851700b7 100644 --- a/core/src/main/resources/operatorsScore.csv +++ b/core/src/main/resources/operatorsScore.csv @@ -80,6 +80,7 @@ CollectSet,4 Concat,4 ConcatWs,4 Contains,4 +Conv,4 Cos,4 Cosh,4 Cot,4 diff --git a/core/src/main/resources/supportedExprs.csv b/core/src/main/resources/supportedExprs.csv index 7358bb832..1de488209 100644 --- a/core/src/main/resources/supportedExprs.csv +++ b/core/src/main/resources/supportedExprs.csv @@ -127,6 +127,10 @@ ConcatWs,S,`concat_ws`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA Contains,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA Contains,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA Contains,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,num,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,from_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,to_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA +Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow. ,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA Cos,S,`cos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Cos,S,`cos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Cos,S,`cos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala b/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala index a5a7c8262..b60b6a914 100644 --- a/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala +++ b/core/src/test/scala/com/nvidia/spark/rapids/tool/planparser/SqlPlanParserSuite.scala @@ -976,6 +976,36 @@ class SQLPlanParserSuite extends BaseTestSuite { } } + test("conv is supported in ProjectExec") { + TrampolineUtil.withTempDir { parquetoutputLoc => + TrampolineUtil.withTempDir { eventLogDir => + val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir, + "ProjectExprsSupported") { spark => + import spark.implicits._ + val df1 = Seq("10", "-10", "-1", "FFFFFFFFFFFFFFFF").toDF("value") + // write df1 to parquet to transform LocalTableScan to ProjectExec + df1.write.parquet(s"$parquetoutputLoc/testtext") + val df2 = spark.read.parquet(s"$parquetoutputLoc/testtext") + // conv should be part of ProjectExec + df2.select(conv(df2("value"), 16, 10)) + } + val pluginTypeChecker = new PluginTypeChecker() + val app = createAppFromEventlog(eventLog) + assert(app.sqlPlans.size == 2) + val parsedPlans = app.sqlPlans.map { case (sqlID, plan) => + SQLPlanParser.parseSQLPlan(app.appId, plan, sqlID, "", pluginTypeChecker, app) + } + val allExecInfo = getAllExecsFromPlan(parsedPlans.toSeq) + val wholeStages = allExecInfo.filter(_.exec.contains("WholeStageCodegen")) + assert(wholeStages.size == 1) + assert(wholeStages.forall(_.duration.nonEmpty)) + val allChildren = wholeStages.flatMap(_.children).flatten + val projects = allChildren.filter(_.exec == "Project") + assertSizeAndSupported(1, projects) + } + } + } + test("Parse SQL function Name in HashAggregateExec") { TrampolineUtil.withTempDir { eventLogDir => val (eventLog, _) = ToolTestUtils.generateEventLog(eventLogDir, "sqlmetric") { spark =>