diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py
index f2e3435d0c5..1587b6591bd 100644
--- a/integration_tests/conftest.py
+++ b/integration_tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -62,3 +62,6 @@ def pytest_addoption(parser):
parser.addoption(
"--pyarrow_test", action='store_true', default=False, help="if enable pyarrow tests"
)
+ parser.addoption(
+ "--default_configs_path", action="store", default=None, help="path to a JSON file that stores default configs for integration test"
+ )
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 3ea20b75610..8829cd2d6ec 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -68,6 +68,16 @@
org.apache.spark
spark-hive_${scala.binary.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
@@ -78,6 +88,31 @@
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ populate-default-configs-for-testing
+ generate-test-resources
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
maven-assembly-plugin
3.6.0
@@ -181,7 +216,7 @@
exec-maven-plugin
- run pyspark tests
+ run-pyspark-tests
verify
exec
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 22a23349791..cd6cc5cef00 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -223,6 +223,7 @@ else
REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror
STD_INPUT_PATH="$INPUT_PATH"/src/test/resources
+ DEFAULT_CONFIGS_PATH=${DEFAULT_CONFIGS_PATH:-${TARGET_DIR}/spark-rapids-default-configs.json}
TEST_COMMON_OPTS=(-v
-r"$REPORT_CHARS"
"$TEST_TAGS"
@@ -232,6 +233,7 @@ else
"$TEST_ARGS"
$RUN_TEST_PARAMS
--junitxml=TEST-pytest-`date +%s%N`.xml
+ --default_configs_path="${DEFAULT_CONFIGS_PATH}"
"$@")
NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 6209d0b152a..29f9c15fb88 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -1,6 +1,6 @@
org.apache.spark
spark-hive_${scala.binary.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
@@ -78,6 +88,31 @@
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ populate-default-configs-for-testing
+ generate-test-resources
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+
maven-assembly-plugin
3.6.0
@@ -181,7 +216,7 @@
exec-maven-plugin
- run pyspark tests
+ run-pyspark-tests
verify
exec
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 1161334a4c0..cc33e64bf66 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -939,6 +939,20 @@
iceberg-core
${iceberg.version}
provided
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
org.apache.spark
@@ -1439,7 +1453,7 @@ This will force full Scala code rebuild in downstream modules.
org.codehaus.mojo
exec-maven-plugin
- 3.0.0
+ 3.3.0
org.apache.maven.plugins
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 14551471e66..494ba34237e 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -15,15 +15,18 @@
*/
package com.nvidia.spark.rapids
-import java.io.{File, FileOutputStream}
+import java.io.{BufferedOutputStream, DataOutputStream, File, FileOutputStream}
+import java.nio.charset.StandardCharsets
import java.util
-
-import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, ListBuffer}
+import java.util.Locale
import ai.rapids.cudf.Cuda
import com.nvidia.spark.rapids.jni.RmmSpark.OomInjectionType
import com.nvidia.spark.rapids.lore.{LoreId, OutputLoreId}
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization.writePretty
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{HashMap, ListBuffer}
import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
@@ -124,6 +127,7 @@ abstract class ConfEntry[T](val key: String, val converter: String => T, val doc
def get(conf: Map[String, String]): T
def get(conf: SQLConf): T
+ def getDefault(): T
def help(asTable: Boolean = false): Unit
override def toString: String = key
@@ -147,6 +151,10 @@ class ConfEntryWithDefault[T](key: String, converter: String => T, doc: String,
}
}
+ override def getDefault(): T = {
+ defaultValue
+ }
+
override def help(asTable: Boolean = false): Unit = {
if (!isInternal) {
val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -182,6 +190,10 @@ class OptionalConfEntry[T](key: String, val rawConverter: String => T, doc: Stri
}
}
+ override def getDefault(): Option[T] = {
+ None
+ }
+
override def help(asTable: Boolean = false): Unit = {
if (!isInternal) {
val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -2374,6 +2386,17 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
println("-----|-----------------|-------------|---------------|------")
}
+ /**
+ * Returns all spark-rapids configs with their default values.
+ * This function is used to dump default configs, so that they
+ * could be used by the integration test.
+ */
+ def getAllConfigsWithDefault: Map[String, Any] = {
+ val allConfs = registeredConfs.clone()
+ allConfs.append(RapidsPrivateUtil.getPrivateConfigs(): _*)
+ allConfs.map(e => e.key -> e.getDefault).toMap
+ }
+
def help(asTable: Boolean = false): Unit = {
helpCommon(asTable)
helpAdvanced(asTable)
@@ -2523,6 +2546,49 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
}
}
}
+
+ object Format extends Enumeration {
+ type Format = Value
+ val PLAIN, JSON = Value
+ }
+
+ def dumpConfigsWithDefault(formatName: String, outputPath: String): Unit = {
+ import com.nvidia.spark.rapids.Arm._
+
+ val format = Format.withName(formatName.toUpperCase(Locale.US))
+
+ println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
+
+ val allConfs = getAllConfigsWithDefault
+ withResource(new FileOutputStream(outputPath)) { fos =>
+ withResource(new BufferedOutputStream(fos)) { bos =>
+ format match {
+ case Format.PLAIN =>
+ withResource(new DataOutputStream(bos)) { dos =>
+ allConfs.foreach( { case (k, v) =>
+ val valStr = v match {
+ case Some(optVal) => optVal.toString
+ case None => ""
+ case _ =>
+ if (v == null) {
+ ""
+ } else {
+ v.toString
+ }
+ }
+ dos.writeUTF(s"'${k}': '${valStr}',")
+ })
+ }
+ case Format.JSON =>
+ implicit val formats: DefaultFormats.type = DefaultFormats
+ bos.write(writePretty(allConfs)
+ .getBytes(StandardCharsets.UTF_8))
+ case _ =>
+ System.err.println(s"Unknown format: ${format}")
+ }
+ }
+ }
+ }
}
class RapidsConf(conf: Map[String, String]) extends Logging {