From 18b0472af9ffcf77e43fe5726e91b74ecab0637f Mon Sep 17 00:00:00 2001 From: Ahmed Hussein <50450311+amahussein@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:31:32 -0600 Subject: [PATCH] Optimize implementation of getAggregateRawMetrics in core-tools (#1468) * Optimize implementation of getAggregateRawMetrics in core-tools * address reviews and fix issues in aggregateDiagnostic Contributes to #1461 This commit improves the implementation of aggregation accross raw metrics by replacing the builtin scala collections with accumulators. --------- Signed-off-by: Ahmed Hussein (amahussein) --- .../analysis/AppSparkMetricsAnalyzer.scala | 293 ++++++++---------- .../tool/analysis/util/AggAccumHelper.scala | 59 ++++ .../analysis/util/AggAccumPhotonHelper.scala | 31 ++ .../tool/analysis/util/JobAggAccum.scala | 31 ++ .../tool/analysis/util/SQLAggAccum.scala | 42 +++ .../tool/analysis/util/StageAggAccum.scala | 31 ++ .../tool/analysis/util/StageAggPhoton.scala | 54 ++++ .../analysis/util/TaskMetricsAccumRec.scala | 157 ++++++++++ .../sql/rapids/tool/store/AccumMetaRef.scala | 1 + .../sql/rapids/tool/store/AccumNameRef.scala | 2 +- 10 files changed, 529 insertions(+), 172 deletions(-) create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumHelper.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumPhotonHelper.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/JobAggAccum.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/SQLAggAccum.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggAccum.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggPhoton.scala create mode 100644 core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala index 33194644e..3a862097b 100644 --- a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/AppSparkMetricsAnalyzer.scala @@ -16,17 +16,16 @@ package com.nvidia.spark.rapids.tool.analysis -import java.util.concurrent.TimeUnit - import scala.collection.mutable.{ArrayBuffer, HashMap, LinkedHashMap} import com.nvidia.spark.rapids.tool.analysis.StageAccumDiagnosticMetrics._ +import com.nvidia.spark.rapids.tool.analysis.util.{AggAccumHelper, AggAccumPhotonHelper} import com.nvidia.spark.rapids.tool.planparser.DatabricksParseHelper -import com.nvidia.spark.rapids.tool.profiling.{AccumProfileResults, IOAnalysisProfileResult, JobAggTaskMetricsProfileResult, ShuffleSkewProfileResult, SQLDurationExecutorTimeProfileResult, SQLMaxTaskInputSizes, SQLTaskAggMetricsProfileResult, StageAggTaskMetricsProfileResult, StageDiagnosticResult} +import com.nvidia.spark.rapids.tool.profiling._ import org.apache.spark.sql.rapids.tool.{AppBase, ToolUtils} import org.apache.spark.sql.rapids.tool.profiling.ApplicationInfo -import org.apache.spark.sql.rapids.tool.store.{AccumInfo, AccumMetaRef, AccumNameRef, TaskModel} +import org.apache.spark.sql.rapids.tool.store.{AccumInfo, AccumMetaRef} /** * Does analysis on the DataFrames from object of AppBase. @@ -84,52 +83,47 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { if (jc.stageIds.isEmpty) { None } else { - val profResultsInJob = stageLevelSparkMetrics(index).filterKeys(jc.stageIds.contains).values - if (profResultsInJob.isEmpty) { + val jobAggAccumulator = new AggAccumHelper() + val perJobRec = jobAggAccumulator.accumPerJob( + jc.stageIds.collect { + case stageId if stageLevelSparkMetrics(index).contains(stageId) => + stageLevelSparkMetrics(index)(stageId) + }) + if (perJobRec.isEmptyAggregates) { None } else { - // Recalculate the duration sum, max, min, avg for the job based on the cached - // stage Profiling results - val tasksInJob = profResultsInJob.map(_.numTasks).sum - val durSum = profResultsInJob.map(_.durationSum).sum - val durMax = - AppSparkMetricsAnalyzer.maxWithEmptyHandling(profResultsInJob.map(_.durationMax)) - val durMin = - AppSparkMetricsAnalyzer.minWithEmptyHandling(profResultsInJob.map(_.durationMin)) - val durAvg = ToolUtils.calculateAverage(durSum, tasksInJob, 1) Some(JobAggTaskMetricsProfileResult(index, id, - tasksInJob, + perJobRec.numTasks, jc.duration, - profResultsInJob.map(_.diskBytesSpilledSum).sum, - durSum, - durMax, - durMin, - durAvg, - profResultsInJob.map(_.executorCPUTimeSum).sum, - profResultsInJob.map(_.executorDeserializeCpuTimeSum).sum, - profResultsInJob.map(_.executorDeserializeTimeSum).sum, - profResultsInJob.map(_.executorRunTimeSum).sum, - profResultsInJob.map(_.inputBytesReadSum).sum, - profResultsInJob.map(_.inputRecordsReadSum).sum, - profResultsInJob.map(_.jvmGCTimeSum).sum, - profResultsInJob.map(_.memoryBytesSpilledSum).sum, - profResultsInJob.map(_.outputBytesWrittenSum).sum, - profResultsInJob.map(_.outputRecordsWrittenSum).sum, - AppSparkMetricsAnalyzer.maxWithEmptyHandling( - profResultsInJob.map(_.peakExecutionMemoryMax)), - profResultsInJob.map(_.resultSerializationTimeSum).sum, - AppSparkMetricsAnalyzer.maxWithEmptyHandling(profResultsInJob.map(_.resultSizeMax)), - profResultsInJob.map(_.srFetchWaitTimeSum).sum, - profResultsInJob.map(_.srLocalBlocksFetchedSum).sum, - profResultsInJob.map(_.srcLocalBytesReadSum).sum, - profResultsInJob.map(_.srRemoteBlocksFetchSum).sum, - profResultsInJob.map(_.srRemoteBytesReadSum).sum, - profResultsInJob.map(_.srRemoteBytesReadToDiskSum).sum, - profResultsInJob.map(_.srTotalBytesReadSum).sum, - profResultsInJob.map(_.swBytesWrittenSum).sum, - profResultsInJob.map(_.swRecordsWrittenSum).sum, - profResultsInJob.map(_.swWriteTimeSum).sum)) + perJobRec.diskBytesSpilledSum, + perJobRec.durationSum, + perJobRec.durationMax, + perJobRec.durationMin, + perJobRec.durationAvg, + perJobRec.executorCPUTimeSum, + perJobRec.executorDeserializeCpuTimeSum, + perJobRec.executorDeserializeTimeSum, + perJobRec.executorRunTimeSum, + perJobRec.inputBytesReadSum, + perJobRec.inputRecordsReadSum, + perJobRec.jvmGCTimeSum, + perJobRec.memoryBytesSpilledSum, + perJobRec.outputBytesWrittenSum, + perJobRec.outputRecordsWrittenSum, + perJobRec.peakExecutionMemoryMax, + perJobRec.resultSerializationTimeSum, + perJobRec.resultSizeMax, + perJobRec.srFetchWaitTimeSum, + perJobRec.srLocalBlocksFetchedSum, + perJobRec.srLocalBytesReadSum, + perJobRec.srRemoteBlocksFetchSum, + perJobRec.srRemoteBytesReadSum, + perJobRec.srRemoteBytesReadToDiskSum, + perJobRec.srTotalBytesReadSum, + perJobRec.swBytesWrittenSum, + perJobRec.swRecordsWrittenSum, + perJobRec.swWriteTimeSum)) } } } @@ -182,66 +176,55 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { if (app.sqlIdToStages.contains(sqlId)) { val stagesInSQL = app.sqlIdToStages(sqlId) // TODO: Should we only consider successful tasks? - val cachedResBySQL = stageLevelSparkMetrics(index).filterKeys(stagesInSQL.contains).values - if (cachedResBySQL.isEmpty) { + val sqlAggAccumulator = new AggAccumHelper() + val preSqlRec = sqlAggAccumulator.accumPerSQL( + stagesInSQL.collect { + case stageId if stageLevelSparkMetrics(index).contains(stageId) => + stageLevelSparkMetrics(index)(stageId) + }) + if (preSqlRec.isEmptyAggregates) { None } else { - // Recalculate the duration sum, max, min, avg for the job based on the cached - // stage Profiling results - val tasksInSql = cachedResBySQL.map(_.numTasks).sum - val durSum = cachedResBySQL.map(_.durationSum).sum - val durMax = - AppSparkMetricsAnalyzer.maxWithEmptyHandling(cachedResBySQL.map(_.durationMax)) - val durMin = - AppSparkMetricsAnalyzer.minWithEmptyHandling(cachedResBySQL.map(_.durationMin)) - val durAvg = ToolUtils.calculateAverage(durSum, tasksInSql, 1) - val diskBytes = cachedResBySQL.map(_.diskBytesSpilledSum).sum - val execCpuTime = cachedResBySQL.map(_.executorCPUTimeSum).sum - val execRunTime = cachedResBySQL.map(_.executorRunTimeSum).sum - val execCPURatio = ToolUtils.calculateDurationPercent(execCpuTime, execRunTime) - val inputBytesRead = cachedResBySQL.map(_.inputBytesReadSum).sum // set this here, so make sure we don't get it again until later - sqlCase.sqlCpuTimePercent = execCPURatio - + sqlCase.sqlCpuTimePercent = preSqlRec.executorCpuRatio Some(SQLTaskAggMetricsProfileResult(index, app.appId, sqlId, sqlCase.description, - tasksInSql, + preSqlRec.numTasks, sqlCase.duration, - execCpuTime, - execRunTime, - execCPURatio, - diskBytes, - durSum, - durMax, - durMin, - durAvg, - execCpuTime, - cachedResBySQL.map(_.executorDeserializeCpuTimeSum).sum, - cachedResBySQL.map(_.executorDeserializeTimeSum).sum, - execRunTime, - inputBytesRead, - inputBytesRead * 1.0 / tasksInSql, - cachedResBySQL.map(_.inputRecordsReadSum).sum, - cachedResBySQL.map(_.jvmGCTimeSum).sum, - cachedResBySQL.map(_.memoryBytesSpilledSum).sum, - cachedResBySQL.map(_.outputBytesWrittenSum).sum, - cachedResBySQL.map(_.outputRecordsWrittenSum).sum, - AppSparkMetricsAnalyzer.maxWithEmptyHandling( - cachedResBySQL.map(_.peakExecutionMemoryMax)), - cachedResBySQL.map(_.resultSerializationTimeSum).sum, - AppSparkMetricsAnalyzer.maxWithEmptyHandling(cachedResBySQL.map(_.resultSizeMax)), - cachedResBySQL.map(_.srFetchWaitTimeSum).sum, - cachedResBySQL.map(_.srLocalBlocksFetchedSum).sum, - cachedResBySQL.map(_.srcLocalBytesReadSum).sum, - cachedResBySQL.map(_.srRemoteBlocksFetchSum).sum, - cachedResBySQL.map(_.srRemoteBytesReadSum).sum, - cachedResBySQL.map(_.srRemoteBytesReadToDiskSum).sum, - cachedResBySQL.map(_.srTotalBytesReadSum).sum, - cachedResBySQL.map(_.swBytesWrittenSum).sum, - cachedResBySQL.map(_.swRecordsWrittenSum).sum, - cachedResBySQL.map(_.swWriteTimeSum).sum)) + preSqlRec.executorCPUTimeSum, + preSqlRec.executorRunTimeSum, + preSqlRec.executorCpuRatio, + preSqlRec.diskBytesSpilledSum, + preSqlRec.durationSum, + preSqlRec.durationMax, + preSqlRec.durationMin, + preSqlRec.durationAvg, + preSqlRec.executorCPUTimeSum, + preSqlRec.executorDeserializeCpuTimeSum, + preSqlRec.executorDeserializeTimeSum, + preSqlRec.executorRunTimeSum, + preSqlRec.inputBytesReadSum, + preSqlRec.inputBytesReadAvg, + preSqlRec.inputRecordsReadSum, + preSqlRec.jvmGCTimeSum, + preSqlRec.memoryBytesSpilledSum, + preSqlRec.outputBytesWrittenSum, + preSqlRec.outputRecordsWrittenSum, + preSqlRec.peakExecutionMemoryMax, + preSqlRec.resultSerializationTimeSum, + preSqlRec.resultSizeMax, + preSqlRec.srFetchWaitTimeSum, + preSqlRec.srLocalBlocksFetchedSum, + preSqlRec.srLocalBytesReadSum, + preSqlRec.srRemoteBlocksFetchSum, + preSqlRec.srRemoteBytesReadSum, + preSqlRec.srRemoteBytesReadToDiskSum, + preSqlRec.srTotalBytesReadSum, + preSqlRec.swBytesWrittenSum, + preSqlRec.swRecordsWrittenSum, + preSqlRec.swWriteTimeSum)) } } else { None @@ -339,8 +322,9 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { app.asInstanceOf[ApplicationInfo].planMetricProcessor } val zeroAccumProfileResults = - AccumProfileResults(0, 0, AccumMetaRef(0L, AccumNameRef("")), 0L, 0L, 0L, 0L) - + AccumProfileResults(0, 0, AccumMetaRef.EMPTY_ACCUM_META_REF, 0L, 0L, 0L, 0L) + val emptyNodeNames = Seq.empty[String] + val emptyDiagnosticMetrics = HashMap.empty[String, AccumProfileResults] // TODO: this has stage attempts. we should handle different attempts app.stageManager.getAllStages.map { sm => // TODO: Should we only consider successful tasks? @@ -348,11 +332,11 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { sm.stageInfo.attemptNumber()) // count duplicate task attempts val numTasks = tasksInStage.size - val nodeNames = sqlAnalyzer.stageToNodeNames. - getOrElse(sm.stageInfo.stageId, Seq.empty[String]) - val diagnosticMetricsMap = sqlAnalyzer.stageToDiagnosticMetrics. - getOrElse(sm.stageInfo.stageId, HashMap.empty[String, AccumProfileResults]). - withDefaultValue(zeroAccumProfileResults) + val nodeNames = sqlAnalyzer.stageToNodeNames.getOrElse(sm.stageInfo.stageId, emptyNodeNames) + val diagnosticMetricsMap = + sqlAnalyzer.stageToDiagnosticMetrics + .getOrElse(sm.stageInfo.stageId, emptyDiagnosticMetrics) + .withDefaultValue(zeroAccumProfileResults) val srTotalBytesMetrics = AppSparkMetricsAnalyzer.getStatistics(tasksInStage.map(_.sr_totalBytesRead)) @@ -417,10 +401,8 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { // TODO: Should we only consider successful tasks? val tasksInStage = app.taskManager.getTasks(sm.stageInfo.stageId, sm.stageInfo.attemptNumber()) - // count duplicate task attempts - val numAttempts = tasksInStage.size - val (peakMemoryMax, shuffleWriteTimeSum) = if (app.isPhoton) { + val accumHelperObj = if (app.isPhoton) { // If this a photon app, use the photonHelper // For max peak memory, we need to look at the accumulators at the task level. val peakMemoryValues = tasksInStage.flatMap { taskModel => photonPeakMemoryAccumInfos.flatMap { accumInfo => @@ -431,50 +413,45 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { val shuffleWriteValues = photonShuffleWriteTimeAccumInfos.flatMap { accumInfo => accumInfo.stageValuesMap.get(sm.stageInfo.stageId) } - (AppSparkMetricsAnalyzer.maxWithEmptyHandling(peakMemoryValues), - TimeUnit.NANOSECONDS.toMillis(shuffleWriteValues.sum)) + new AggAccumPhotonHelper(shuffleWriteValues, peakMemoryValues) } else { // For non-Photon apps, use the task metrics directly. - val peakMemoryValues = tasksInStage.map(_.peakExecutionMemory) - val shuffleWriteTime = tasksInStage.map(_.sw_writeTime) - (AppSparkMetricsAnalyzer.maxWithEmptyHandling(peakMemoryValues), - shuffleWriteTime.sum) + new AggAccumHelper() } - - val (durSum, durMax, durMin, durAvg) = AppSparkMetricsAnalyzer.getDurations(tasksInStage) + val perStageRec = accumHelperObj.accumPerStage(tasksInStage) val stageRow = StageAggTaskMetricsProfileResult(index, sm.stageInfo.stageId, - numAttempts, // TODO: why is this numAttempts and not numTasks? + // numTasks includes duplicate task attempts + perStageRec.numTasks, sm.duration, - tasksInStage.map(_.diskBytesSpilled).sum, - durSum, - durMax, - durMin, - durAvg, - tasksInStage.map(_.executorCPUTime).sum, - tasksInStage.map(_.executorDeserializeCPUTime).sum, - tasksInStage.map(_.executorDeserializeTime).sum, - tasksInStage.map(_.executorRunTime).sum, - tasksInStage.map(_.input_bytesRead).sum, - tasksInStage.map(_.input_recordsRead).sum, - tasksInStage.map(_.jvmGCTime).sum, - tasksInStage.map(_.memoryBytesSpilled).sum, - tasksInStage.map(_.output_bytesWritten).sum, - tasksInStage.map(_.output_recordsWritten).sum, - peakMemoryMax, - tasksInStage.map(_.resultSerializationTime).sum, - AppSparkMetricsAnalyzer.maxWithEmptyHandling(tasksInStage.map(_.resultSize)), - tasksInStage.map(_.sr_fetchWaitTime).sum, - tasksInStage.map(_.sr_localBlocksFetched).sum, - tasksInStage.map(_.sr_localBytesRead).sum, - tasksInStage.map(_.sr_remoteBlocksFetched).sum, - tasksInStage.map(_.sr_remoteBytesRead).sum, - tasksInStage.map(_.sr_remoteBytesReadToDisk).sum, - tasksInStage.map(_.sr_totalBytesRead).sum, - tasksInStage.map(_.sw_bytesWritten).sum, - tasksInStage.map(_.sw_recordsWritten).sum, - shuffleWriteTimeSum - ) + perStageRec.diskBytesSpilledSum, + perStageRec.durationSum, + perStageRec.durationMax, + perStageRec.durationMin, + perStageRec.durationAvg, + perStageRec.executorCPUTimeSum, + perStageRec.executorDeserializeCpuTimeSum, + perStageRec.executorDeserializeTimeSum, + perStageRec.executorRunTimeSum, + perStageRec.inputBytesReadSum, + perStageRec.inputRecordsReadSum, + perStageRec.jvmGCTimeSum, + perStageRec.memoryBytesSpilledSum, + perStageRec.outputBytesWrittenSum, + perStageRec.outputRecordsWrittenSum, + perStageRec.peakExecutionMemoryMax, + perStageRec.resultSerializationTimeSum, + perStageRec.resultSizeMax, + perStageRec.srFetchWaitTimeSum, + perStageRec.srLocalBlocksFetchedSum, + perStageRec.srLocalBytesReadSum, + perStageRec.srRemoteBlocksFetchSum, + perStageRec.srRemoteBytesReadSum, + perStageRec.srRemoteBytesReadToDiskSum, + perStageRec.srTotalBytesReadSum, + perStageRec.swBytesWrittenSum, + perStageRec.swRecordsWrittenSum, + perStageRec.swWriteTimeSum) stageLevelSparkMetrics(index).put(sm.stageInfo.stageId, stageRow) } } @@ -482,16 +459,6 @@ class AppSparkMetricsAnalyzer(app: AppBase) extends AppAnalysisBase(app) { object AppSparkMetricsAnalyzer { - def getDurations(tcs: Iterable[TaskModel]): (Long, Long, Long, Double) = { - val durations = tcs.map(_.duration) - if (durations.nonEmpty) { - (durations.sum, durations.max, durations.min, - ToolUtils.calculateAverage(durations.sum, durations.size, 1)) - } else { - (0L, 0L, 0L, 0.toDouble) - } - } - /** * Given an input iterable, returns its min, median, max and sum. */ @@ -509,20 +476,4 @@ object AppSparkMetricsAnalyzer { StatisticsMetrics(sortedArr.head, med, sortedArr(len - 1), sortedArr.sum) } } - - def maxWithEmptyHandling(arr: Iterable[Long]): Long = { - if (arr.isEmpty) { - 0L - } else { - arr.max - } - } - - def minWithEmptyHandling(arr: Iterable[Long]): Long = { - if (arr.isEmpty) { - 0L - } else { - arr.min - } - } } diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumHelper.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumHelper.scala new file mode 100644 index 000000000..b42ac08b4 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumHelper.scala @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import com.nvidia.spark.rapids.tool.profiling.StageAggTaskMetricsProfileResult + +import org.apache.spark.sql.rapids.tool.store.TaskModel + +/** + * A helper class to facilitate the accumulation of aggregate metrics. + * This is a separate class to allow further customization in the future. For example, + * a parellel processor can be used to split the iterables without changing the caller side. + */ +class AggAccumHelper { + + private def accumCachedRecords[R <: TaskMetricsAccumRec]( + stageRecords: Iterable[StageAggTaskMetricsProfileResult], + rec: R): Unit = { + stageRecords.foreach(rec.addRecord) + rec.finalizeAggregation() + } + + protected def createStageAccumRecord(): TaskMetricsAccumRec = { + StageAggAccum() + } + + def accumPerStage(taskRecords: Iterable[TaskModel]): TaskMetricsAccumRec = { + val resRec = createStageAccumRecord() + taskRecords.foreach(resRec.addRecord) + resRec.finalizeAggregation() + resRec + } + + def accumPerSQL(stageRecords: Iterable[StageAggTaskMetricsProfileResult]): SQLAggAccum = { + val resRec = SQLAggAccum() + accumCachedRecords(stageRecords, resRec) + resRec + } + + def accumPerJob(stageRecords: Iterable[StageAggTaskMetricsProfileResult]): JobAggAccum = { + val resRec = JobAggAccum() + accumCachedRecords(stageRecords, resRec) + resRec + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumPhotonHelper.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumPhotonHelper.scala new file mode 100644 index 000000000..4f1356960 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/AggAccumPhotonHelper.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +/** + * Implementation of AggAccumHelper for Photon. + * It takes the shuffleWriteValues and peakMemValues Accumulables as an argument because those + * values are not available in the TaskModel. + */ +class AggAccumPhotonHelper( + shuffleWriteValues: Iterable[Long], + peakMemValues: Iterable[Long]) extends AggAccumHelper { + + override def createStageAccumRecord(): TaskMetricsAccumRec = { + StageAggPhoton(shuffleWriteValues, peakMemValues) + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/JobAggAccum.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/JobAggAccum.scala new file mode 100644 index 000000000..a8e5b78db --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/JobAggAccum.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import org.apache.spark.sql.rapids.tool.store.TaskModel + +/** + * Accumulator for Job Aggregates. + * This is an optimization to avoid using the Scala collections API on each field for the entire + * number of tasks/stages in a Job. + */ +case class JobAggAccum() extends TaskMetricsAccumRec { + override def addRecord(rec: TaskModel): Unit = { + throw new UnsupportedOperationException( + "Not implemented: JobAggAccum accepts only cached records") + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/SQLAggAccum.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/SQLAggAccum.scala new file mode 100644 index 000000000..b8222679f --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/SQLAggAccum.scala @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import org.apache.spark.sql.rapids.tool.ToolUtils +import org.apache.spark.sql.rapids.tool.store.TaskModel + +/** + * Accumulator for SQL Aggregates. + * This is an optimization to avoid using the Scala collections API on each field for the entire + * number of tasks/stages in a SQL. + */ +case class SQLAggAccum( + var executorCpuRatio: Double = 0, + // Not added to the output since it is used only by the AutoTuner + var inputBytesReadAvg: Double = 0) extends TaskMetricsAccumRec { + + override def finalizeAggregation(): Unit = { + super.finalizeAggregation() + executorCpuRatio = ToolUtils.calculateDurationPercent(executorCPUTimeSum, executorRunTimeSum) + inputBytesReadAvg = ToolUtils.calculateAverage(inputBytesReadSum, numTasks, 1) + } + + override def addRecord(rec: TaskModel): Unit = { + throw new UnsupportedOperationException( + "Not implemented: SQLAggAccum accepts only cached records") + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggAccum.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggAccum.scala new file mode 100644 index 000000000..c88f1a77d --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggAccum.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import com.nvidia.spark.rapids.tool.profiling.StageAggTaskMetricsProfileResult + +/** + * Accumulator for Stage Aggregates. + * This is an optimization to avoid using the Scala collections API on each field for the entire + * number of tasks in a Stage. + */ +case class StageAggAccum() extends TaskMetricsAccumRec { + override def addRecord(rec: StageAggTaskMetricsProfileResult): Unit = { + throw new UnsupportedOperationException("Not implemented: Cannot use cached results to" + + "calculate stage aggregates") + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggPhoton.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggPhoton.scala new file mode 100644 index 000000000..ed7127050 --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/StageAggPhoton.scala @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import java.util.concurrent.TimeUnit + +import com.nvidia.spark.rapids.tool.profiling.StageAggTaskMetricsProfileResult + +/** + * Implementation of Accumulator object for Photon. + * It takes the shuffleWriteValues and peakMemValues Accumulables as an argument because those + * values are not available in the TaskModel. + */ +case class StageAggPhoton( + shuffleWriteValues: Iterable[Long], + peakMemValues: Iterable[Long]) extends TaskMetricsAccumRec { + + override def addRecord(rec: StageAggTaskMetricsProfileResult): Unit = { + throw new UnsupportedOperationException("Not implemented: Cannot use cached results to" + + "calculate stage aggregates") + } + + override def finalizeAggregation(): Unit = { + // Fix the shuffleWriteTimes and the peakMemoryValues to use the shuffleWriteValues and + // the peakMemValues. + swWriteTimeSum = 0 + peakExecutionMemoryMax = 0 + if (!isEmptyAggregates) { + // Re-calculate the photon specific fields only if the accumulator has tasks. + // Otherwise, leave it as 0. + if (shuffleWriteValues.nonEmpty) { + swWriteTimeSum = TimeUnit.NANOSECONDS.toMillis(shuffleWriteValues.sum) + } + if (peakMemValues.nonEmpty) { + peakExecutionMemoryMax = peakMemValues.max + } + } + super.finalizeAggregation() + } +} diff --git a/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala new file mode 100644 index 000000000..b5d98b9ac --- /dev/null +++ b/core/src/main/scala/com/nvidia/spark/rapids/tool/analysis/util/TaskMetricsAccumRec.scala @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids.tool.analysis.util + +import com.nvidia.spark.rapids.tool.profiling.StageAggTaskMetricsProfileResult + +import org.apache.spark.sql.rapids.tool.ToolUtils +import org.apache.spark.sql.rapids.tool.store.TaskModel + +/** + * Accumulator used for task metrics. + * This is an optimization decision to avoid using Scala builtin collections on every field in the + * taskModel. + */ +class TaskMetricsAccumRec { + var numTasks: Int = 0 + var diskBytesSpilledSum: Long = 0 + var durationSum: Long = 0 + var durationMax: Long = Long.MinValue + var durationMin: Long = Long.MaxValue + var durationAvg: Double = 0.0 + var executorCPUTimeSum: Long = 0 + var executorDeserializeCpuTimeSum: Long = 0 + var executorDeserializeTimeSum: Long = 0 + var executorRunTimeSum: Long = 0 + var inputBytesReadSum: Long = 0 + var inputRecordsReadSum: Long = 0 + var jvmGCTimeSum: Long = 0 + var memoryBytesSpilledSum: Long = 0 + var outputBytesWrittenSum: Long = 0 + var outputRecordsWrittenSum: Long = 0 + var peakExecutionMemoryMax: Long = Long.MinValue + var resultSerializationTimeSum: Long = 0 + var resultSizeMax: Long = Long.MinValue + var srFetchWaitTimeSum: Long = 0 + var srLocalBlocksFetchedSum: Long = 0 + var srLocalBytesReadSum: Long = 0 + var srRemoteBlocksFetchSum: Long = 0 + var srRemoteBytesReadSum: Long = 0 + var srRemoteBytesReadToDiskSum: Long = 0 + var srTotalBytesReadSum: Long = 0 + var swBytesWrittenSum: Long = 0 + var swRecordsWrittenSum: Long = 0 + var swWriteTimeSum: Long = 0 + + /** + * Assumption that 0-tasks implies no aggregations on metrics. This means that metrics on + * job/SQL levels won't be accumulated as long as no tasks are accounted for. + */ + def isEmptyAggregates: Boolean = numTasks == 0 + + /** + * Reset all fields to 0. This is used to reset the fields when the Task iterator is empty. + * When the iterator is empty, then fields such as "max" should be reset to 0. + */ + def resetFields(): Unit = { + durationMax = 0 + durationMin = 0 + peakExecutionMemoryMax = 0 + resultSizeMax = 0 + } + + def addRecord(rec: TaskModel): Unit = { + numTasks += 1 + // SumFields + diskBytesSpilledSum += rec.diskBytesSpilled + durationSum += rec.duration + executorCPUTimeSum += rec.executorCPUTime + executorDeserializeCpuTimeSum += rec.executorDeserializeCPUTime + executorDeserializeTimeSum += rec.executorDeserializeTime + executorRunTimeSum += rec.executorRunTime + inputBytesReadSum += rec.input_bytesRead + inputRecordsReadSum += rec.input_recordsRead + jvmGCTimeSum += rec.jvmGCTime + memoryBytesSpilledSum += rec.memoryBytesSpilled + outputBytesWrittenSum += rec.output_bytesWritten + outputRecordsWrittenSum += rec.output_recordsWritten + resultSerializationTimeSum += rec.resultSerializationTime + srFetchWaitTimeSum += rec.sr_fetchWaitTime + srLocalBlocksFetchedSum += rec.sr_localBlocksFetched + srLocalBytesReadSum += rec.sr_localBytesRead + srRemoteBlocksFetchSum += rec.sr_remoteBlocksFetched + srRemoteBytesReadSum += rec.sr_remoteBytesRead + srRemoteBytesReadToDiskSum += rec.sr_remoteBytesReadToDisk + srTotalBytesReadSum += rec.sr_totalBytesRead + swBytesWrittenSum += rec.sw_bytesWritten + swRecordsWrittenSum += rec.sw_recordsWritten + swWriteTimeSum += rec.sw_writeTime + // Max fields + durationMax = math.max(durationMax, rec.duration) + peakExecutionMemoryMax = math.max(peakExecutionMemoryMax, rec.peakExecutionMemory) + resultSizeMax = math.max(resultSizeMax, rec.resultSize) + // Min Fields + durationMin = math.min(durationMin, rec.duration) + } + + def addRecord(rec: StageAggTaskMetricsProfileResult): Unit = { + // Sums + numTasks += rec.numTasks + durationSum += rec.durationSum + diskBytesSpilledSum += rec.diskBytesSpilledSum + executorCPUTimeSum += rec.executorCPUTimeSum + executorRunTimeSum += rec.executorRunTimeSum + inputBytesReadSum += rec.inputBytesReadSum + executorDeserializeCpuTimeSum += rec.executorDeserializeCpuTimeSum + executorDeserializeTimeSum += rec.executorDeserializeTimeSum + inputRecordsReadSum += rec.inputRecordsReadSum + jvmGCTimeSum += rec.jvmGCTimeSum + memoryBytesSpilledSum += rec.memoryBytesSpilledSum + outputBytesWrittenSum += rec.outputBytesWrittenSum + outputRecordsWrittenSum += rec.outputRecordsWrittenSum + resultSerializationTimeSum += rec.resultSerializationTimeSum + srFetchWaitTimeSum += rec.srFetchWaitTimeSum + srLocalBlocksFetchedSum += rec.srLocalBlocksFetchedSum + srLocalBytesReadSum += rec.srcLocalBytesReadSum + srRemoteBlocksFetchSum += rec.srRemoteBlocksFetchSum + srRemoteBytesReadSum += rec.srRemoteBytesReadSum + srRemoteBytesReadToDiskSum += rec.srRemoteBytesReadToDiskSum + srTotalBytesReadSum += rec.srTotalBytesReadSum + swBytesWrittenSum += rec.swBytesWrittenSum + swRecordsWrittenSum += rec.swRecordsWrittenSum + swWriteTimeSum += rec.swWriteTimeSum + // Max + durationMax = math.max(durationMax, rec.durationMax) + peakExecutionMemoryMax = math.max(peakExecutionMemoryMax, rec.peakExecutionMemoryMax) + resultSizeMax = math.max(resultSizeMax, rec.resultSizeMax) + // Min + durationMin = math.min(durationMin, rec.durationMin) + } + + /** + * This method should be called to finalize the accumulations of all the metrics. + * For example, calculating averages and doing any last transformations on a field before the + * results are consumed. + */ + def finalizeAggregation(): Unit = { + durationAvg = ToolUtils.calculateAverage(durationSum, numTasks, 1) + if (numTasks < 1) { + // number of tasks is 0, then we should reset fields such as (max, min) to 0. + resetFields() + } + } +} diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumMetaRef.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumMetaRef.scala index 7b70bedb2..35c9c19e1 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumMetaRef.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumMetaRef.scala @@ -27,6 +27,7 @@ case class AccumMetaRef(id: Long, name: AccumNameRef) { } object AccumMetaRef { + val EMPTY_ACCUM_META_REF: AccumMetaRef = new AccumMetaRef(0L, AccumNameRef.EMPTY_ACC_NAME_REF) def apply(id: Long, name: Option[String]): AccumMetaRef = new AccumMetaRef(id, AccumNameRef.getOrCreateAccumNameRef(name)) } diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumNameRef.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumNameRef.scala index 0172f5229..4ce41e4a5 100644 --- a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumNameRef.scala +++ b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumNameRef.scala @@ -42,7 +42,7 @@ case class AccumNameRef(value: String) { object AccumNameRef { // Dummy AccNameRef to represent None accumulator names. This is an optimization to avoid // storing an option[string] for all accumulable names which leads to "get-or-else" everywhere. - private val EMPTY_ACC_NAME_REF: AccumNameRef = new AccumNameRef("N/A") + val EMPTY_ACC_NAME_REF: AccumNameRef = new AccumNameRef("N/A") // A global table to store reference to all accumulator names. The map is accessible by all // threads (different applications) running in parallel. This avoids duplicate work across // different threads.