Skip to content

Commit

Permalink
added unit test
Browse files Browse the repository at this point in the history
Signed-off-by: cindyyuanjiang <[email protected]>
  • Loading branch information
cindyyuanjiang committed Dec 12, 2023
1 parent 9a05aa6 commit 6da5607
Showing 1 changed file with 67 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,73 @@ class AutoTunerSuite extends FunSuite with BeforeAndAfterEach with Logging {
assert(expectedResults == autoTunerOutput)
}

test("AQE configuration autoBroadcastJoinThreshold should not be set >= 100mb") {
val customProps = mutable.LinkedHashMap(
"spark.executor.cores" -> "8",
"spark.executor.memory" -> "47222m",
"spark.rapids.sql.concurrentGpuTasks" -> "2",
"spark.task.resource.gpu.amount" -> "0.0625")
// mock the properties loaded from eventLog
val logEventsProps: mutable.Map[String, String] =
mutable.LinkedHashMap[String, String](
"spark.executor.cores" -> "16",
"spark.executor.instances" -> "1",
"spark.executor.memory" -> "80g",
"spark.executor.resource.gpu.amount" -> "1",
"spark.executor.instances" -> "1",
"spark.sql.shuffle.partitions" -> "200",
"spark.sql.files.maxPartitionBytes" -> "1g",
"spark.task.resource.gpu.amount" -> "0.0625",
"spark.executor.memoryOverhead" -> "7372m",
"spark.rapids.memory.pinnedPool.size" -> "5g",
"spark.rapids.sql.enabled" -> "true",
"spark.plugins" -> "com.nvidia.spark.SQLPlugin",
"spark.rapids.sql.concurrentGpuTasks" -> "4",
"spark.sql.adaptive.enabled" -> "true",
"spark.sql.adaptive.autoBroadcastJoinThreshold" -> "500mb")
val dataprocWorkerInfo = buildWorkerInfoAsString(Some(customProps), Some(32),
Some("212992MiB"), Some(5), Some(4), Some("15109MiB"), Some("Tesla T4"))
val infoProvider = getMockInfoProvider(8126464.0, Seq(0), Seq(0.004), logEventsProps,
Some(defaultSparkVersion))
val autoTuner: AutoTuner = AutoTuner.buildAutoTunerFromProps(dataprocWorkerInfo, infoProvider)
val (properties, comments) = autoTuner.getRecommendedProperties()
val autoTunerOutput = Profiler.getAutoTunerResultsAsString(properties, comments)
// scalastyle:off line.size.limit
val expectedResults =
s"""|
|Spark Properties:
|--conf spark.executor.cores=8
|--conf spark.executor.instances=20
|--conf spark.executor.memory=16384m
|--conf spark.executor.memoryOverhead=6758m
|--conf spark.rapids.memory.pinnedPool.size=4096m
|--conf spark.rapids.shuffle.multiThreaded.reader.threads=8
|--conf spark.rapids.shuffle.multiThreaded.writer.threads=8
|--conf spark.rapids.sql.batchSizeBytes=2147483647
|--conf spark.rapids.sql.concurrentGpuTasks=2
|--conf spark.rapids.sql.multiThreadedRead.numThreads=20
|--conf spark.shuffle.manager=com.nvidia.spark.rapids.spark311.RapidsShuffleManager
|--conf spark.sql.adaptive.advisoryPartitionSizeInBytes=128m
|--conf spark.sql.adaptive.coalescePartitions.minPartitionNum=160
|--conf spark.sql.files.maxPartitionBytes=4096m
|--conf spark.task.resource.gpu.amount=0.125
|
|Comments:
|- 'spark.rapids.shuffle.multiThreaded.reader.threads' was not set.
|- 'spark.rapids.shuffle.multiThreaded.writer.threads' was not set.
|- 'spark.rapids.sql.batchSizeBytes' was not set.
|- 'spark.rapids.sql.multiThreadedRead.numThreads' was not set.
|- 'spark.shuffle.manager' was not set.
|- 'spark.sql.adaptive.advisoryPartitionSizeInBytes' was not set.
|- 'spark.sql.adaptive.coalescePartitions.minPartitionNum' was not set.
|- ${AutoTuner.classPathComments("rapids.jars.missing")}
|- Setting 'spark.sql.adaptive.autoBroadcastJoinThreshold' > 100m could lead to performance regression. Should be set to a lower number.
|- ${AutoTuner.classPathComments("rapids.shuffle.jars")}
|""".stripMargin
// scalastyle:on line.size.limit
assert(expectedResults == autoTunerOutput)
}

private def testPartitionConfigurations(
inputSize: Double,
shuffleRead: Double,
Expand Down

0 comments on commit 6da5607

Please sign in to comment.