Adding comments

Signed-off-by: Sayed Bilal Bari <[email protected]>
amahussein · Aug 2, 2024 · 5339dd9 · 5339dd9
1 parent d157a41
commit 5339dd9
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 4 deletions.
diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccMetaRef.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccMetaRef.scala
@@ -16,6 +16,12 @@
 
 package org.apache.spark.sql.rapids.tool.store
 
+/**
+ * Accumulator Meta Reference
+ * This maintains the reference to the metadata associated with an accumulable
+ * @param id - Accumulable id
+ * @param name - Reference to the accumulator name
+ */
 case class AccMetaRef(id: Long, name: AccNameRef)
 
 object AccMetaRef {

diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccNameRef.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccNameRef.scala
@@ -20,6 +20,11 @@ import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.spark.sql.rapids.tool.util.EventUtils.normalizeMetricName
 
+/**
+ * Accumulator Name Reference
+ * This maintains references to all accumulator names
+ * @param value
+ */
 case class AccNameRef(value: String)
 
 object AccNameRef {

diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumInfo.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumInfo.scala
@@ -23,6 +23,14 @@ import com.nvidia.spark.rapids.tool.analysis.StatisticsMetrics
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.sql.rapids.tool.util.EventUtils.parseAccumFieldToLong
 
+/**
+ * Maintains the accumulator information for a single accumulator
+ * This maintains following information:
+ * 1. Task updates for the accumulator - a map of all taskIds and their update values
+ * 2. Stage values for the accumulator - a map of all stageIds and their total values
+ * 3. AccumMetaRef for the accumulator - a reference to the Meta information
+ * @param infoRef - AccumMetaRef for the accumulator
+ */
 class AccumInfo(val infoRef: AccMetaRef) {
   // TODO: Should we use sorted maps for stageIDs and taskIds?
   val taskUpdatesMap: mutable.HashMap[Long, Long] =
@@ -51,8 +59,6 @@ class AccumInfo(val infoRef: AccMetaRef) {
     parsedUpdateValue match {
       case Some(v) =>
         taskUpdatesMap.put(taskId, v + existingUpdateValue)
-        // update teh stage if the task's update is non-zero
-        updateStageFlag ||= v != 0
       case None =>
         taskUpdatesMap.put(taskId, existingUpdateValue)
     }

diff --git a/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumManager.scala b/core/src/main/scala/org/apache/spark/sql/rapids/tool/store/AccumManager.scala
@@ -22,12 +22,16 @@ import com.nvidia.spark.rapids.tool.analysis.StatisticsMetrics
 
 import org.apache.spark.scheduler.AccumulableInfo
 
+/**
+ * A class that manages all accumulables -
+ * maintains a map of accumulable id to AccumInfo
+ */
 class AccumManager {
   val accumInfoMap: mutable.HashMap[Long, AccumInfo] = {
     new mutable.HashMap[Long, AccumInfo]()
   }
 
-  def getOrCreateAccumInfo(id: Long, name: Option[String]): AccumInfo = {
+  private def getOrCreateAccumInfo(id: Long, name: Option[String]): AccumInfo = {
     accumInfoMap.getOrElseUpdate(id, new AccumInfo(AccMetaRef(id, name)))
   }
 
@@ -52,7 +56,6 @@ class AccumManager {
     }.toMap
   }
 
-
   def removeAccumInfo(id: Long): Option[AccumInfo] = {
     accumInfoMap.remove(id)
   }