From f557976b4e0513dbfbf89f9070b6e34485de743c Mon Sep 17 00:00:00 2001 From: Sayed Bilal Bari Date: Tue, 2 Jul 2024 16:30:47 -0500 Subject: [PATCH 1/5] Adding GC Metrics Signed-off-by: Sayed Bilal Bari --- .../rapids/tool/benchmarks/Benchmark.scala | 31 +++++++++++++++---- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala index f4359c4a6..33f4ca74f 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala @@ -17,7 +17,9 @@ package org.apache.spark.rapids.tool.benchmarks import java.io.{OutputStream, PrintStream} +import java.lang.management.ManagementFactory +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration.NANOSECONDS @@ -101,20 +103,30 @@ class Benchmark( // The results are going to be processor specific so it is useful to include that. out.println(RuntimeUtil.getJVMOSInfo.mkString("\n")) val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max)) - out.printf(s"%-${nameLen}s %14s %14s %11s %10s\n", - name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Relative") + out.printf(s"%-${nameLen}s %14s %14s %11s %10s %10s %10s\n", + name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Max GC Time(ms)", + "Max GC Count", "Relative") out.println("-" * (nameLen + 80)) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf(s"%-${nameLen}s %14s %14s %11s %10s\n", + out.printf(s"%-${nameLen}s %14s %14s %11s %10s %10s %10s\n", benchmark.name, "%5.0f" format result.bestMs, "%4.0f" format result.avgMs, "%5.0f" format result.stdevMs, + "%8d" format result.maxGcTime, + "%5d" format result.maxGcCount, "%3.1fX" format (firstBest / result.bestMs)) } out.println() } + private def getGcMetrics: (Any, Any) = { + val gcBeans = ManagementFactory.getGarbageCollectorMXBeans + val gcTime = gcBeans.map(_.getCollectionTime).sum + val gcCount = gcBeans.map(_.getCollectionCount).sum + (gcTime, gcCount) + } + /** * Runs a single function `f` for iters, returning the average time the function took and * the rate of the function. @@ -127,17 +139,23 @@ class Benchmark( val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters val runTimes = ArrayBuffer[Long]() var totalTime = 0L + var maxGcCount = 0L + var maxGcTime = 0L for (i <- 0 until minIters) { val timer = new ToolsTimer(i) + val (gcTimeBefore, gcCountBefore) = getGcMetrics f(timer) val runTime = timer.totalTime() runTimes += runTime totalTime += runTime - + val (gcTimeAfter, gcCountAfter) = getGcMetrics + maxGcTime = math.max(maxGcTime, gcTimeAfter.asInstanceOf[Long] - gcTimeBefore.asInstanceOf[Long]) + maxGcCount = math.max(maxGcCount, gcCountAfter.asInstanceOf[Long] - gcCountBefore.asInstanceOf[Long]) if (outputPerIteration) { // scalastyle:off println("*"*80) println(s"Iteration $i took ${NANOSECONDS.toMicros(runTime)} microseconds") + println(s"Iteration $i GC time ${gcTimeAfter.asInstanceOf[Long] - gcTimeBefore.asInstanceOf[Long]}") println("*"*80) // scalastyle:on } @@ -153,12 +171,13 @@ class Benchmark( val stdev = if (runTimes.size > 1) { math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1)) } else 0 - Benchmark.Result(avg / 1000000.0, best / 1000000.0, stdev / 1000000.0) + Benchmark.Result(avg / 1000000.0, best / 1000000.0, stdev / 1000000.0, maxGcTime, maxGcCount) } } object Benchmark { case class Case(name: String, fn: ToolsTimer => Unit, numIters: Int) - case class Result(avgMs: Double, bestMs: Double, stdevMs: Double) + case class Result(avgMs: Double, bestMs: Double, stdevMs: Double, + maxGcTime: Long = 0, maxGcCount: Long = 0) } From 4eb9997c068e4e3deb9c11decec21f2eee4627b4 Mon Sep 17 00:00:00 2001 From: Sayed Bilal Bari Date: Wed, 3 Jul 2024 15:10:33 -0500 Subject: [PATCH 2/5] Review comment changes Signed-off-by: Sayed Bilal Bari --- .../rapids/tool/benchmarks/Benchmark.scala | 52 +++++++++++-------- .../tool/util/MemoryMetricsTracker.scala | 45 ++++++++++++++++ 2 files changed, 74 insertions(+), 23 deletions(-) create mode 100644 core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala index 33f4ca74f..8ecf6b574 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala @@ -17,16 +17,14 @@ package org.apache.spark.rapids.tool.benchmarks import java.io.{OutputStream, PrintStream} -import java.lang.management.ManagementFactory -import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration.NANOSECONDS import org.apache.commons.io.output.TeeOutputStream -import org.apache.spark.sql.rapids.tool.util.{RuntimeUtil, ToolsTimer} +import org.apache.spark.sql.rapids.tool.util.{MemoryMetricsTracker, RuntimeUtil, ToolsTimer} /** * This code is mostly copied from org.apache.spark.benchmark.BenchmarkBase @@ -103,30 +101,26 @@ class Benchmark( // The results are going to be processor specific so it is useful to include that. out.println(RuntimeUtil.getJVMOSInfo.mkString("\n")) val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max)) - out.printf(s"%-${nameLen}s %14s %14s %11s %10s %10s %10s\n", + out.printf(s"%-${nameLen}s %14s %14s %11s %20s %20s %20s %20s %20s %10s\n", name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Max GC Time(ms)", - "Max GC Count", "Relative") - out.println("-" * (nameLen + 80)) + "Max GC Count", "Avg Free Memory(MB)","Avg Used Memory(MB)", "Max Heap Memory(MB)", "Relative") + out.println("-" * (nameLen + 160)) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf(s"%-${nameLen}s %14s %14s %11s %10s %10s %10s\n", + out.printf(s"%-${nameLen}s %14s %14s %11s %20s %20s %20s %20s %20s %10s\n", benchmark.name, "%5.0f" format result.bestMs, "%4.0f" format result.avgMs, "%5.0f" format result.stdevMs, - "%8d" format result.maxGcTime, - "%5d" format result.maxGcCount, + "%8d" format result.memoryParams.maxGCTime, + "%5d" format result.memoryParams.maxGCTime, + "%5.0f" format result.memoryParams.avgFreeHeapMemory, + "%5.0f" format result.memoryParams.avgUsedHeapMemory, + "%5d" format result.memoryParams.maxHeapMemory, "%3.1fX" format (firstBest / result.bestMs)) } out.println() } - private def getGcMetrics: (Any, Any) = { - val gcBeans = ManagementFactory.getGarbageCollectorMXBeans - val gcTime = gcBeans.map(_.getCollectionTime).sum - val gcCount = gcBeans.map(_.getCollectionCount).sum - (gcTime, gcCount) - } - /** * Runs a single function `f` for iters, returning the average time the function took and * the rate of the function. @@ -139,23 +133,31 @@ class Benchmark( val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters val runTimes = ArrayBuffer[Long]() var totalTime = 0L + //For tracking maximum GC over iterations var maxGcCount = 0L var maxGcTime = 0L + var totalFreeMemory = 0L + var totalUsedMemory = 0L + var maxHeapMemory = 0L for (i <- 0 until minIters) { val timer = new ToolsTimer(i) - val (gcTimeBefore, gcCountBefore) = getGcMetrics + val memoryTracker = new MemoryMetricsTracker f(timer) val runTime = timer.totalTime() runTimes += runTime totalTime += runTime - val (gcTimeAfter, gcCountAfter) = getGcMetrics - maxGcTime = math.max(maxGcTime, gcTimeAfter.asInstanceOf[Long] - gcTimeBefore.asInstanceOf[Long]) - maxGcCount = math.max(maxGcCount, gcCountAfter.asInstanceOf[Long] - gcCountBefore.asInstanceOf[Long]) + val gcCount = memoryTracker.getTotalGCCount + val gcTime = memoryTracker.getTotalGCTime + maxGcTime = math.max(maxGcTime, gcTime) + maxGcCount = math.max(maxGcCount, gcCount) + totalFreeMemory += memoryTracker.getCurrentFreeHeapMemory + totalUsedMemory += memoryTracker.getCurrentUsedHeapMemory + maxHeapMemory = if (maxHeapMemory == 0L) memoryTracker.getCurrentMaxHeapMemory else maxHeapMemory if (outputPerIteration) { // scalastyle:off println("*"*80) println(s"Iteration $i took ${NANOSECONDS.toMicros(runTime)} microseconds") - println(s"Iteration $i GC time ${gcTimeAfter.asInstanceOf[Long] - gcTimeBefore.asInstanceOf[Long]}") + println(s"Iteration $i GC time $gcTime ms, GC count $gcCount") println("*"*80) // scalastyle:on } @@ -171,13 +173,17 @@ class Benchmark( val stdev = if (runTimes.size > 1) { math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1)) } else 0 - Benchmark.Result(avg / 1000000.0, best / 1000000.0, stdev / 1000000.0, maxGcTime, maxGcCount) + Benchmark.Result(avg / 1000000.0, best / 1000000.0, stdev / 1000000.0, + JVMMemoryParams(maxGcTime, maxGcCount, maxHeapMemory/ 1000000, + totalUsedMemory / (minIters*1000000), totalFreeMemory / (minIters*1000000))) } } object Benchmark { case class Case(name: String, fn: ToolsTimer => Unit, numIters: Int) + case class JVMMemoryParams( maxGCTime: Long, maxGCCount: Long, maxHeapMemory: Long, + avgUsedHeapMemory: Double, avgFreeHeapMemory: Double) case class Result(avgMs: Double, bestMs: Double, stdevMs: Double, - maxGcTime: Long = 0, maxGcCount: Long = 0) + memoryParams: JVMMemoryParams) } diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala new file mode 100644 index 000000000..ff618191f --- /dev/null +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala @@ -0,0 +1,45 @@ +package org.apache.spark.sql.rapids.tool.util + +import java.lang.management.ManagementFactory + +import scala.collection.convert.ImplicitConversions.`collection AsScalaIterable` + +/** + * Utility class to track memory metrics. + * This class is used to track memory metrics such as GC count, GC time, + * heap memory usage, etc. + * + */ +class MemoryMetricsTracker { + private val startGCMetrics = getCurrentGCMetrics + + private def getCurrentGCMetrics: (Long, Long) = { + val gcBeans = ManagementFactory.getGarbageCollectorMXBeans + + (gcBeans.map(_.getCollectionCount).sum, + gcBeans.map(_.getCollectionTime).sum) + } + + def getTotalGCCount: Long = { + val (newGcCount:Long, _) = getCurrentGCMetrics + newGcCount - startGCMetrics._1 + } + + def getTotalGCTime: Long = { + val (_, newGcTime:Long) = getCurrentGCMetrics + newGcTime - startGCMetrics._2 + } + + def getCurrentMaxHeapMemory: Long = { + ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax + } + + def getCurrentUsedHeapMemory: Long = { + ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed + } + + def getCurrentFreeHeapMemory: Long = { + ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax - + ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed + } +} From f1664df4e155699e1a0b23f0fce1578207d397fc Mon Sep 17 00:00:00 2001 From: Sayed Bilal Bari Date: Wed, 3 Jul 2024 17:06:17 -0500 Subject: [PATCH 3/5] Correcting output format + refactoring Signed-off-by: Sayed Bilal Bari --- .../rapids/tool/benchmarks/Benchmark.scala | 66 +++++++++---------- ...scala => QualificationToolBenchmark.scala} | 17 +++-- .../tool/util/MemoryMetricsTracker.scala | 13 ---- .../sql/rapids/tool/util/RuntimeUtil.scala | 2 +- 4 files changed, 44 insertions(+), 54 deletions(-) rename core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/{QualificationBenchmark.scala => QualificationToolBenchmark.scala} (75%) diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala index 8ecf6b574..6da9cc6b1 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.rapids.tool.util.{MemoryMetricsTracker, RuntimeUtil, * This will output the average time to run each function and the rate of each function. */ class Benchmark( - name: String, + name: String = "Benchmarker", valuesPerIteration: Long, minNumIters: Int, warmUpIterations: Int, @@ -100,22 +100,23 @@ class Benchmark( val firstBest = results.head.bestMs // The results are going to be processor specific so it is useful to include that. out.println(RuntimeUtil.getJVMOSInfo.mkString("\n")) + out.println(s"MaxHeapMemory -> ${Runtime.getRuntime.maxMemory()} \n") val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max)) - out.printf(s"%-${nameLen}s %14s %14s %11s %20s %20s %20s %20s %20s %10s\n", - name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Max GC Time(ms)", - "Max GC Count", "Avg Free Memory(MB)","Avg Used Memory(MB)", "Max Heap Memory(MB)", "Relative") + out.printf(s"%-${nameLen}s %14s %14s %11s %20s %18s %18s %18s %18s %10s\n", + name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Avg GC Time(ms)", + "Avg GC Count", "Stdev GC Count","Max GC Time(ms)","Max GC Count", "Relative") out.println("-" * (nameLen + 160)) results.zip(benchmarks).foreach { case (result, benchmark) => - out.printf(s"%-${nameLen}s %14s %14s %11s %20s %20s %20s %20s %20s %10s\n", + out.printf(s"%-${nameLen}s %14s %14s %11s %20s %18s %18s %18s %18s %10s\n", benchmark.name, "%5.0f" format result.bestMs, "%4.0f" format result.avgMs, "%5.0f" format result.stdevMs, - "%8d" format result.memoryParams.maxGCTime, - "%5d" format result.memoryParams.maxGCTime, - "%5.0f" format result.memoryParams.avgFreeHeapMemory, - "%5.0f" format result.memoryParams.avgUsedHeapMemory, - "%5d" format result.memoryParams.maxHeapMemory, + "%5.1f" format result.memoryParams.avgGCTime, + "%5.1f" format result.memoryParams.avgGCCount, + "%5.0f" format result.memoryParams.stdDevGCCount, + "%5d" format result.memoryParams.maxGcTime, + "%5d" format result.memoryParams.maxGCCount, "%3.1fX" format (firstBest / result.bestMs)) } out.println() @@ -132,32 +133,21 @@ class Benchmark( } val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters val runTimes = ArrayBuffer[Long]() - var totalTime = 0L + val gcCounts = ArrayBuffer[Long]() + val gcTimes = ArrayBuffer[Long]() //For tracking maximum GC over iterations - var maxGcCount = 0L - var maxGcTime = 0L - var totalFreeMemory = 0L - var totalUsedMemory = 0L - var maxHeapMemory = 0L for (i <- 0 until minIters) { val timer = new ToolsTimer(i) val memoryTracker = new MemoryMetricsTracker f(timer) val runTime = timer.totalTime() runTimes += runTime - totalTime += runTime - val gcCount = memoryTracker.getTotalGCCount - val gcTime = memoryTracker.getTotalGCTime - maxGcTime = math.max(maxGcTime, gcTime) - maxGcCount = math.max(maxGcCount, gcCount) - totalFreeMemory += memoryTracker.getCurrentFreeHeapMemory - totalUsedMemory += memoryTracker.getCurrentUsedHeapMemory - maxHeapMemory = if (maxHeapMemory == 0L) memoryTracker.getCurrentMaxHeapMemory else maxHeapMemory + gcCounts += memoryTracker.getTotalGCCount + gcTimes += memoryTracker.getTotalGCTime if (outputPerIteration) { // scalastyle:off println("*"*80) println(s"Iteration $i took ${NANOSECONDS.toMicros(runTime)} microseconds") - println(s"Iteration $i GC time $gcTime ms, GC count $gcCount") println("*"*80) // scalastyle:on } @@ -168,22 +158,30 @@ class Benchmark( println("*"*80) // scalastyle:on assert(runTimes.nonEmpty) - val best = runTimes.min - val avg = runTimes.sum / runTimes.size - val stdev = if (runTimes.size > 1) { - math.sqrt(runTimes.map(time => (time - avg) * (time - avg)).sum / (runTimes.size - 1)) + val bestRuntime = runTimes.min + val avgRuntime = runTimes.sum / runTimes.size + val stdevRunTime = if (runTimes.size > 1) { + math.sqrt(runTimes.map(time => (time - avgRuntime) * + (time - avgRuntime)).sum / (runTimes.size - 1)) } else 0 - Benchmark.Result(avg / 1000000.0, best / 1000000.0, stdev / 1000000.0, - JVMMemoryParams(maxGcTime, maxGcCount, maxHeapMemory/ 1000000, - totalUsedMemory / (minIters*1000000), totalFreeMemory / (minIters*1000000))) + val maxGcCount = gcCounts.max + val stdevGcCount = if (gcCounts.size > 1) { + math.sqrt(gcCounts.map(gc => (gc - maxGcCount) * + (gc - maxGcCount)).sum / (gcCounts.size - 1)) + } else 0 + val avgGcCount = gcCounts.sum / minIters + val avgGcTime = gcTimes.sum / minIters + val maxGcTime = gcTimes.max + Benchmark.Result(avgRuntime / 1000000.0, bestRuntime / 1000000.0, stdevRunTime / 1000000.0, + JVMMemoryParams(avgGcTime, avgGcCount, stdevGcCount, maxGcCount, maxGcTime)) } } object Benchmark { case class Case(name: String, fn: ToolsTimer => Unit, numIters: Int) - case class JVMMemoryParams( maxGCTime: Long, maxGCCount: Long, maxHeapMemory: Long, - avgUsedHeapMemory: Double, avgFreeHeapMemory: Double) + case class JVMMemoryParams( avgGCTime:Double, avgGCCount:Double, + stdDevGCCount: Double, maxGCCount: Long, maxGcTime:Long) case class Result(avgMs: Double, bestMs: Double, stdevMs: Double, memoryParams: JVMMemoryParams) } diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationBenchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala similarity index 75% rename from core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationBenchmark.scala rename to core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala index 17712739f..af1095f89 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationBenchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala @@ -27,23 +27,28 @@ import com.nvidia.spark.rapids.tool.qualification.QualificationMain.mainInternal * 2. Write the benchmark code in the runBenchmark method passing relevant arguments * 3. Write benchmarked code inside */ -object QualificationBenchmark extends BenchmarkBase { +object QualificationToolBenchmark extends BenchmarkBase { override def runBenchmarkSuite(iterations: Int, warmUpIterations: Int, outputFormat: String, mainArgs: Array[String]): Unit = { - runBenchmark("QualificationBenchmark") { + runBenchmark("Benchmark_Per_SQL_Arg_Qualification") { val benchmarker = new Benchmark( - "QualificationBenchmark", - 2, + valuesPerIteration = 2, output = output, outputPerIteration = true, warmUpIterations = warmUpIterations, minNumIters = iterations) - benchmarker.addCase("QualificationBenchmark") { _ => + benchmarker.addCase("Enable_Per_SQL_Arg_Qualification") { _ => + val (prefix,suffix) = mainArgs.splitAt(mainArgs.length - 1) + + mainInternal(new QualificationArgs(prefix :+ "--per-sql" :+ suffix.head), + enablePB = true) + } + benchmarker.addCase("Disable_Per_SQL_Arg_Qualification") { _ => mainInternal(new QualificationArgs(mainArgs), - printStdout = true, enablePB = true) + enablePB = true) } benchmarker.run() } diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala index ff618191f..c796e413c 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/MemoryMetricsTracker.scala @@ -29,17 +29,4 @@ class MemoryMetricsTracker { val (_, newGcTime:Long) = getCurrentGCMetrics newGcTime - startGCMetrics._2 } - - def getCurrentMaxHeapMemory: Long = { - ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax - } - - def getCurrentUsedHeapMemory: Long = { - ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed - } - - def getCurrentFreeHeapMemory: Long = { - ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax - - ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed - } } diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeUtil.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeUtil.scala index abe2b55cd..d39323c7a 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeUtil.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeUtil.scala @@ -68,7 +68,7 @@ object RuntimeUtil extends Logging { def getJVMOSInfo: Map[String, String] = { Map( "jvm.name" -> System.getProperty("java.vm.name"), - "jvm.version" -> System.getProperty("java.vm.version"), + "jvm.version" -> System.getProperty("java.version"), "os.name" -> System.getProperty("os.name"), "os.version" -> System.getProperty("os.version") ) From 58a7d9f89e869a0ede0bb58661ac46aa26cf44eb Mon Sep 17 00:00:00 2001 From: Sayed Bilal Bari Date: Fri, 5 Jul 2024 13:13:04 -0500 Subject: [PATCH 4/5] Output Formatting Changes Signed-off-by: Sayed Bilal Bari --- .../rapids/tool/benchmarks/Benchmark.scala | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala index 6da9cc6b1..36abc3a6c 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala @@ -99,8 +99,14 @@ class Benchmark( val firstBest = results.head.bestMs // The results are going to be processor specific so it is useful to include that. - out.println(RuntimeUtil.getJVMOSInfo.mkString("\n")) - out.println(s"MaxHeapMemory -> ${Runtime.getRuntime.maxMemory()} \n") + val jvmInfo = RuntimeUtil.getJVMOSInfo + out.printf(s"%-26s -> %-30s \n","JVM Name", jvmInfo("jvm.name")) + out.printf(s"%-26s -> %-30s \n","Java Version",jvmInfo("jvm.version")) + out.printf(s"%-26s -> %-30s \n","OS Name",jvmInfo("os.name")) + out.printf(s"%-26s -> %-30s \n","OS Version",jvmInfo("os.version")) + out.printf(s"%-26s -> %-30s \n","MaxHeapMemory", ("%6d" format Runtime.getRuntime.maxMemory()/1024/1024)+"MB") + out.printf(s"%-26s -> %-30s \n","Total Warm Up Iterations","%2d" format warmUpIterations) + out.printf(s"%-26s -> %-30s \n \n","Total Runtime Iterations","%2d" format minNumIters) val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max)) out.printf(s"%-${nameLen}s %14s %14s %11s %20s %18s %18s %18s %18s %10s\n", name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Avg GC Time(ms)", @@ -117,7 +123,7 @@ class Benchmark( "%5.0f" format result.memoryParams.stdDevGCCount, "%5d" format result.memoryParams.maxGcTime, "%5d" format result.memoryParams.maxGCCount, - "%3.1fX" format (firstBest / result.bestMs)) + "%3.2fX" format (firstBest / result.bestMs)) } out.println() } @@ -163,12 +169,16 @@ class Benchmark( val stdevRunTime = if (runTimes.size > 1) { math.sqrt(runTimes.map(time => (time - avgRuntime) * (time - avgRuntime)).sum / (runTimes.size - 1)) - } else 0 + } else { + 0 + } val maxGcCount = gcCounts.max val stdevGcCount = if (gcCounts.size > 1) { math.sqrt(gcCounts.map(gc => (gc - maxGcCount) * (gc - maxGcCount)).sum / (gcCounts.size - 1)) - } else 0 + } else { + 0 + } val avgGcCount = gcCounts.sum / minIters val avgGcTime = gcTimes.sum / minIters val maxGcTime = gcTimes.max From 94f7f935c0d3135ca96bd24e06504d13a0130b3b Mon Sep 17 00:00:00 2001 From: Sayed Bilal Bari Date: Fri, 5 Jul 2024 13:59:59 -0500 Subject: [PATCH 5/5] Formatting + Making qual bench single threaded Signed-off-by: Sayed Bilal Bari --- .../spark/rapids/tool/benchmarks/Benchmark.scala | 14 +++++++------- ...scala => SingleThreadedQualToolBenchmark.scala} | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) rename core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/{QualificationToolBenchmark.scala => SingleThreadedQualToolBenchmark.scala} (87%) diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala index 36abc3a6c..40dca59a9 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/Benchmark.scala @@ -100,13 +100,13 @@ class Benchmark( val firstBest = results.head.bestMs // The results are going to be processor specific so it is useful to include that. val jvmInfo = RuntimeUtil.getJVMOSInfo - out.printf(s"%-26s -> %-30s \n","JVM Name", jvmInfo("jvm.name")) - out.printf(s"%-26s -> %-30s \n","Java Version",jvmInfo("jvm.version")) - out.printf(s"%-26s -> %-30s \n","OS Name",jvmInfo("os.name")) - out.printf(s"%-26s -> %-30s \n","OS Version",jvmInfo("os.version")) - out.printf(s"%-26s -> %-30s \n","MaxHeapMemory", ("%6d" format Runtime.getRuntime.maxMemory()/1024/1024)+"MB") - out.printf(s"%-26s -> %-30s \n","Total Warm Up Iterations","%2d" format warmUpIterations) - out.printf(s"%-26s -> %-30s \n \n","Total Runtime Iterations","%2d" format minNumIters) + out.printf(s"%-26s : %s \n","JVM Name", jvmInfo("jvm.name")) + out.printf(s"%-26s : %s \n","Java Version", jvmInfo("jvm.version")) + out.printf(s"%-26s : %s \n","OS Name", jvmInfo("os.name")) + out.printf(s"%-26s : %s \n","OS Version", jvmInfo("os.version")) + out.printf(s"%-26s : %s MB \n","MaxHeapMemory", (Runtime.getRuntime.maxMemory()/1024/1024).toString) + out.printf(s"%-26s : %s \n","Total Warm Up Iterations", warmUpIterations.toString) + out.printf(s"%-26s : %s \n \n","Total Runtime Iterations", minNumIters.toString) val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max)) out.printf(s"%-${nameLen}s %14s %14s %11s %20s %18s %18s %18s %18s %10s\n", name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)","Avg GC Time(ms)", diff --git a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/SingleThreadedQualToolBenchmark.scala similarity index 87% rename from core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala rename to core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/SingleThreadedQualToolBenchmark.scala index af1095f89..12b3f3c6d 100644 --- a/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/QualificationToolBenchmark.scala +++ b/core/src/main/scala/org/apache/spark/rapids/tool/benchmarks/SingleThreadedQualToolBenchmark.scala @@ -27,7 +27,7 @@ import com.nvidia.spark.rapids.tool.qualification.QualificationMain.mainInternal * 2. Write the benchmark code in the runBenchmark method passing relevant arguments * 3. Write benchmarked code inside */ -object QualificationToolBenchmark extends BenchmarkBase { +object SingleThreadedQualToolBenchmark extends BenchmarkBase { override def runBenchmarkSuite(iterations: Int, warmUpIterations: Int, outputFormat: String, @@ -40,14 +40,14 @@ object QualificationToolBenchmark extends BenchmarkBase { outputPerIteration = true, warmUpIterations = warmUpIterations, minNumIters = iterations) + val (prefix,suffix) = mainArgs.splitAt(mainArgs.length - 1) benchmarker.addCase("Enable_Per_SQL_Arg_Qualification") { _ => - val (prefix,suffix) = mainArgs.splitAt(mainArgs.length - 1) - - mainInternal(new QualificationArgs(prefix :+ "--per-sql" :+ suffix.head), + mainInternal(new QualificationArgs(prefix :+ "--per-sql" :+ "--num-threads" + :+ "1" :+ suffix.head), enablePB = true) } benchmarker.addCase("Disable_Per_SQL_Arg_Qualification") { _ => - mainInternal(new QualificationArgs(mainArgs), + mainInternal(new QualificationArgs(prefix :+ "--num-threads" :+ "1" :+ suffix.head), enablePB = true) } benchmarker.run()