diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py index f2e3435d0c5..1587b6591bd 100644 --- a/integration_tests/conftest.py +++ b/integration_tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -62,3 +62,6 @@ def pytest_addoption(parser): parser.addoption( "--pyarrow_test", action='store_true', default=False, help="if enable pyarrow tests" ) + parser.addoption( + "--default_configs_path", action="store", default=None, help="path to a JSON file that stores default configs for integration test" + ) diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml index 3ea20b75610..8829cd2d6ec 100644 --- a/integration_tests/pom.xml +++ b/integration_tests/pom.xml @@ -68,6 +68,16 @@ org.apache.spark spark-hive_${scala.binary.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + @@ -78,6 +88,31 @@ + + org.apache.maven.plugins + maven-antrun-plugin + + + populate-default-configs-for-testing + generate-test-resources + + run + + + + + + + + + + + + + maven-assembly-plugin 3.6.0 @@ -181,7 +216,7 @@ exec-maven-plugin - run pyspark tests + run-pyspark-tests verify exec diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index 22a23349791..cd6cc5cef00 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -223,6 +223,7 @@ else REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror STD_INPUT_PATH="$INPUT_PATH"/src/test/resources + DEFAULT_CONFIGS_PATH=${DEFAULT_CONFIGS_PATH:-${TARGET_DIR}/spark-rapids-default-configs.json} TEST_COMMON_OPTS=(-v -r"$REPORT_CHARS" "$TEST_TAGS" @@ -232,6 +233,7 @@ else "$TEST_ARGS" $RUN_TEST_PARAMS --junitxml=TEST-pytest-`date +%s%N`.xml + --default_configs_path="${DEFAULT_CONFIGS_PATH}" "$@") NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0} diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml index 6209d0b152a..29f9c15fb88 100644 --- a/integration_tests/src/assembly/bin.xml +++ b/integration_tests/src/assembly/bin.xml @@ -1,6 +1,6 @@ org.apache.spark spark-hive_${scala.binary.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + @@ -78,6 +88,31 @@ + + org.apache.maven.plugins + maven-antrun-plugin + + + populate-default-configs-for-testing + generate-test-resources + + run + + + + + + + + + + + + + maven-assembly-plugin 3.6.0 @@ -181,7 +216,7 @@ exec-maven-plugin - run pyspark tests + run-pyspark-tests verify exec diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml index 1161334a4c0..cc33e64bf66 100644 --- a/scala2.13/pom.xml +++ b/scala2.13/pom.xml @@ -939,6 +939,20 @@ iceberg-core ${iceberg.version} provided + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + org.apache.spark @@ -1439,7 +1453,7 @@ This will force full Scala code rebuild in downstream modules. org.codehaus.mojo exec-maven-plugin - 3.0.0 + 3.3.0 org.apache.maven.plugins diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 14551471e66..494ba34237e 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -15,15 +15,18 @@ */ package com.nvidia.spark.rapids -import java.io.{File, FileOutputStream} +import java.io.{BufferedOutputStream, DataOutputStream, File, FileOutputStream} +import java.nio.charset.StandardCharsets import java.util - -import scala.collection.JavaConverters._ -import scala.collection.mutable.{HashMap, ListBuffer} +import java.util.Locale import ai.rapids.cudf.Cuda import com.nvidia.spark.rapids.jni.RmmSpark.OomInjectionType import com.nvidia.spark.rapids.lore.{LoreId, OutputLoreId} +import org.json4s.DefaultFormats +import org.json4s.jackson.Serialization.writePretty +import scala.collection.JavaConverters._ +import scala.collection.mutable.{HashMap, ListBuffer} import org.apache.spark.SparkConf import org.apache.spark.internal.Logging @@ -124,6 +127,7 @@ abstract class ConfEntry[T](val key: String, val converter: String => T, val doc def get(conf: Map[String, String]): T def get(conf: SQLConf): T + def getDefault(): T def help(asTable: Boolean = false): Unit override def toString: String = key @@ -147,6 +151,10 @@ class ConfEntryWithDefault[T](key: String, converter: String => T, doc: String, } } + override def getDefault(): T = { + defaultValue + } + override def help(asTable: Boolean = false): Unit = { if (!isInternal) { val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime" @@ -182,6 +190,10 @@ class OptionalConfEntry[T](key: String, val rawConverter: String => T, doc: Stri } } + override def getDefault(): Option[T] = { + None + } + override def help(asTable: Boolean = false): Unit = { if (!isInternal) { val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime" @@ -2374,6 +2386,17 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression. println("-----|-----------------|-------------|---------------|------") } + /** + * Returns all spark-rapids configs with their default values. + * This function is used to dump default configs, so that they + * could be used by the integration test. + */ + def getAllConfigsWithDefault: Map[String, Any] = { + val allConfs = registeredConfs.clone() + allConfs.append(RapidsPrivateUtil.getPrivateConfigs(): _*) + allConfs.map(e => e.key -> e.getDefault).toMap + } + def help(asTable: Boolean = false): Unit = { helpCommon(asTable) helpAdvanced(asTable) @@ -2523,6 +2546,49 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression. } } } + + object Format extends Enumeration { + type Format = Value + val PLAIN, JSON = Value + } + + def dumpConfigsWithDefault(formatName: String, outputPath: String): Unit = { + import com.nvidia.spark.rapids.Arm._ + + val format = Format.withName(formatName.toUpperCase(Locale.US)) + + println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}") + + val allConfs = getAllConfigsWithDefault + withResource(new FileOutputStream(outputPath)) { fos => + withResource(new BufferedOutputStream(fos)) { bos => + format match { + case Format.PLAIN => + withResource(new DataOutputStream(bos)) { dos => + allConfs.foreach( { case (k, v) => + val valStr = v match { + case Some(optVal) => optVal.toString + case None => "" + case _ => + if (v == null) { + "" + } else { + v.toString + } + } + dos.writeUTF(s"'${k}': '${valStr}',") + }) + } + case Format.JSON => + implicit val formats: DefaultFormats.type = DefaultFormats + bos.write(writePretty(allConfs) + .getBytes(StandardCharsets.UTF_8)) + case _ => + System.err.println(s"Unknown format: ${format}") + } + } + } + } } class RapidsConf(conf: Map[String, String]) extends Logging {