-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generate an output file with runtime and build information
Signed-off-by: Ahmed Hussein (amahussein) <[email protected]> Fixes #699 This PR is to dump the runtime/build information of the tools jar to the output folder. By knowing which Tools version has been used and which Spark version has been used in the runtime, developers can have more insights about the output and the numbers generated by the Tools. For Qualification: - `rapids_4_spark_qualification_output/runtime.properties` For Profiling: - `rapids_4_spark_profile/runtime.properties` A sample of the generated file is as follows: Notice that - `runtime.spark.version` is the spark version loaded during runtime, while `build.spark.version` is the version used to build the tools jar. - `build.verion` represents the Tools jar version ``` build.hadoop.version=3.3.6 build.java.version=1.8.0_322 build.scala.version=2.12.15 build.spark.version=3.1.1 build.version=23.10.2-SNAPSHOT runtime.spark.version=3.3.3 ```
- Loading branch information
1 parent
03effc2
commit 90f2997
Showing
8 changed files
with
205 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
|
||
################################## | ||
# Build Properties | ||
################################## | ||
|
||
build.version=${project.version} | ||
build.spark.version=${spark.version} | ||
build.hadoop.version=${hadoop.version} | ||
build.java.version=${java.version} | ||
build.scala.version=${scala.version} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
core/src/main/scala/org/apache/spark/sql/rapids/tool/util/RuntimeReportGenerator.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.rapids.tool.util | ||
|
||
import java.io.{PrintWriter, StringWriter} | ||
|
||
import com.nvidia.spark.rapids.tool.ToolTextFileWriter | ||
import org.apache.hadoop.conf.Configuration | ||
|
||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.rapids.tool.ToolUtils | ||
|
||
|
||
trait RuntimeReporter extends Logging { | ||
val outputDir: String | ||
def generateRuntimeReport(hadoopConf: Option[Configuration] = None): Unit = { | ||
RuntimeReportGenerator.generateReport(outputDir, hadoopConf) | ||
} | ||
} | ||
|
||
/** | ||
* Generates a file containing the properties of the build loaded. | ||
* In addition, it concatenates properties from the runtime (i.e., SparkVersion). | ||
* It is expected that the list of properties in that file will grow depending on whether a | ||
* property helps understanding and investigating the tools output. | ||
* @param outputDir the directory where the report is generated. | ||
* @param hadoopConf the hadoop configuration object used to access the HDFS if any. | ||
*/ | ||
object RuntimeReportGenerator extends Logging { | ||
private val REPORT_LABEL = "RAPIDS Accelerator for Apache Spark's Build/Runtime Information" | ||
private val REPORT_FILE_NAME = "runtime.properties" | ||
def generateReport(outputDir: String, hadoopConf: Option[Configuration] = None): Unit = { | ||
val buildProps = RapidsToolsConfUtil.loadBuildProperties | ||
// Add the Spark version used in runtime. | ||
// Note that it is different from the Spark version used in the build. | ||
buildProps.setProperty("runtime.spark.version", ToolUtils.sparkRuntimeVersion) | ||
val reportWriter = new ToolTextFileWriter(outputDir, REPORT_FILE_NAME, REPORT_LABEL, hadoopConf) | ||
try { | ||
reportWriter.writeProperties(buildProps, REPORT_LABEL) | ||
} finally { | ||
reportWriter.close() | ||
} | ||
// Write the properties to the log | ||
val writer = new StringWriter | ||
buildProps.list(new PrintWriter(writer)) | ||
logInfo(s"\n$REPORT_LABEL\n${writer.getBuffer.toString}") | ||
} | ||
} | ||
|
||
|
62 changes: 62 additions & 0 deletions
62
core/src/main/scala/org/apache/spark/sql/rapids/tool/util/SortedJProperties.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.rapids.tool.util | ||
|
||
import java.io.{IOException, OutputStream} | ||
import java.util.{Collections, Comparator, Enumeration, Map, Properties, Set, TreeSet} | ||
|
||
|
||
|
||
/** | ||
* This is an implementation of Java Properties that stores the properties | ||
* into a file after sorting them by key. | ||
* Another approach would be load the properties into a hashMap. However, this | ||
* won't take into consideration serialization rules and comments. | ||
* This implementation works for Java8+. | ||
* See the following answer on StackOverflow: | ||
* https://stackoverflow.com/questions/10275862/how-to-sort-properties-in-java/55957344#55957344 | ||
*/ | ||
class SortedJProperties extends Properties { | ||
@throws[IOException] | ||
override def store(out: OutputStream, comments: String): Unit = { | ||
val sortedProps: Properties = new Properties() { | ||
override def entrySet: Set[Map.Entry[AnyRef, AnyRef]] = { | ||
/* | ||
* Using comparator to avoid the following exception on jdk >=9: | ||
* java.lang.ClassCastException: java.base/java.util.concurrent.ConcurrentHashMap$MapEntry | ||
* cannot be cast to java.base/java.lang.Comparable | ||
*/ | ||
val sortedSet: Set[Map.Entry[AnyRef, AnyRef]] = | ||
new TreeSet[Map.Entry[AnyRef, AnyRef]]( | ||
new Comparator[Map.Entry[AnyRef, AnyRef]]() { | ||
override def compare(o1: Map.Entry[AnyRef, AnyRef], | ||
o2: Map.Entry[AnyRef, AnyRef]): Int = | ||
o1.getKey.toString.compareTo(o2.getKey.toString) | ||
}) | ||
sortedSet.addAll(super.entrySet) | ||
sortedSet | ||
} | ||
|
||
override def keySet: Set[AnyRef] = new TreeSet[AnyRef](super.keySet) | ||
|
||
override def keys: Enumeration[AnyRef] = | ||
Collections.enumeration(new TreeSet[AnyRef](super.keySet)) | ||
} | ||
sortedProps.putAll(this) | ||
sortedProps.store(out, comments) | ||
} | ||
} |