From e3d6bff75fe720890cd8b467d5ae6e97fab7a7b2 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 29 Jul 2024 14:57:52 -0700
Subject: [PATCH 01/40] Add a new interface to retrieve all configs with their
 defaults; Add a new stage for integration test to populate default configs

Signed-off-by: Jihoon Son <ghoonson@gmail.com>
---
 integration_tests/pom.xml                     | 30 +++++++
 integration_tests/run_pyspark_from_build.sh   |  1 +
 .../src/main/python/spark_session.py          | 26 ++----
 .../rapids/tests/DumpDefaultConfigs.scala     | 83 +++++++++++++++++++
 pom.xml                                       |  2 +-
 scala2.13/integration_tests/pom.xml           | 30 +++++++
 .../com/nvidia/spark/rapids/RapidsConf.scala  | 20 +++++
 7 files changed, 173 insertions(+), 19 deletions(-)
 create mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 75bd244f31d..aabeb45187f 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -180,6 +180,36 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
+                    <execution>
+                        <id>clean previously populated config file</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate default configs for testing</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>run pyspark tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 22a23349791..b3f223d7a46 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -15,6 +15,7 @@
 set -ex
 
 SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+export SPARK_RAPIDS_DEFAULT_CONFIGS_PATH=${SCRIPTPATH}/target/spark-rapids-default-configs.json
 cd "$SCRIPTPATH"
 
 if [[ $( echo ${SKIP_TESTS} | tr [:upper:] [:lower:] ) == "true" ]];
diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
index 26388617fff..8356b49f48c 100644
--- a/integration_tests/src/main/python/spark_session.py
+++ b/integration_tests/src/main/python/spark_session.py
@@ -21,6 +21,7 @@
 from pyspark.sql.types import TimestampType, DateType, _acceptable_types
 from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version
 from unittest.mock import patch
+import json
 
 def _from_scala_map(scala_map):
     ret = {}
@@ -40,23 +41,9 @@ def _from_scala_map(scala_map):
 # These settings can be overridden by specific tests if necessary.
 # Many of these are redundant with default settings for the configs but are set here explicitly
 # to ensure any cluster settings do not interfere with tests that assume the defaults.
-_default_conf = {
-    'spark.rapids.sql.castDecimalToFloat.enabled': 'false',
-    'spark.rapids.sql.castFloatToDecimal.enabled': 'false',
-    'spark.rapids.sql.castFloatToIntegralTypes.enabled': 'false',
-    'spark.rapids.sql.castFloatToString.enabled': 'false',
-    'spark.rapids.sql.castStringToFloat.enabled': 'false',
-    'spark.rapids.sql.castStringToTimestamp.enabled': 'false',
-    'spark.rapids.sql.fast.sample': 'false',
-    'spark.rapids.sql.hasExtendedYearValues': 'true',
-    'spark.rapids.sql.hashOptimizeSort.enabled': 'false',
-    'spark.rapids.sql.improvedFloatOps.enabled': 'false',
-    'spark.rapids.sql.incompatibleDateFormats.enabled': 'false',
-    'spark.rapids.sql.incompatibleOps.enabled': 'false',
-    'spark.rapids.sql.mode': 'executeongpu',
-    'spark.rapids.sql.variableFloatAgg.enabled': 'false',
-    'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true',
-}
+conf_file_path = os.environ['SPARK_RAPIDS_DEFAULT_CONFIGS_PATH']
+with open(conf_file_path) as conf_file:
+    _default_conf = json.load(conf_file)
 
 def _set_all_confs(conf):
     newconf = _default_conf.copy()
@@ -67,7 +54,10 @@ def _set_all_confs(conf):
     newconf.update(conf)
     for key, value in newconf.items():
         if _spark.conf.get(key, None) != value:
-            _spark.conf.set(key, value)
+            if isinstance(value, str):
+                _spark.conf.set(key, value)
+            else:
+                _spark.conf.set(key, str(value))
 
 def reset_spark_session_conf():
     """Reset all of the configs for a given spark session."""
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
new file mode 100644
index 00000000000..57f77247216
--- /dev/null
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tests
+
+import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
+import java.util.Locale
+
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.ShimReflectionUtils
+
+/**
+ * Dump all spark-rapids configs with their defaults.
+ */
+object DumpDefaultConfigs {
+
+  object Format extends Enumeration {
+    type Format = Value
+    val PLAIN, JSON = Value
+  }
+
+  def main(args: Array[String]): Unit = {
+    if (args.length < 2) {
+      System.err.println(s"Usage: ${this.getClass.getCanonicalName} {format} {output_path}")
+      System.exit(1)
+    }
+
+    val format = Format.withName(args(0).toUpperCase(Locale.US))
+    val outputPath = args(1)
+
+    println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
+
+    // We use the reflection as RapidsConf should be accessed via the shim layer.
+    val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
+    val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
+    val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
+    val fos: FileOutputStream = new FileOutputStream(outputPath)
+    withResource(fos) { _ =>
+      val bos: BufferedOutputStream = new BufferedOutputStream(fos)
+      withResource(bos) { _ =>
+        format match {
+          case Format.PLAIN =>
+            val dos: DataOutputStream = new DataOutputStream(bos)
+            withResource(dos) { _ =>
+              allConfs.foreach({ case (k, v) =>
+                val valStr = v match {
+                  case some: Some[_] => some.getOrElse("")
+                  case _ =>
+                    if (v == null) {
+                      ""
+                    } else {
+                      v.toString
+                    }
+                }
+                dos.writeUTF(s"'${k}': '${valStr}',")
+              })
+            }
+          case Format.JSON =>
+            import org.json4s.jackson.Serialization.writePretty
+            import org.json4s.DefaultFormats
+            import java.nio.charset.StandardCharsets
+            implicit val formats = DefaultFormats
+            bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
+          case _ =>
+            System.err.println(s"Unknown format: ${format}")
+        }
+      }
+    }
+  }
+}
diff --git a/pom.xml b/pom.xml
index 91b116c46a9..556b04e92cf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1439,7 +1439,7 @@ This will force full Scala code rebuild in downstream modules.
                 <plugin>
                     <groupId>org.codehaus.mojo</groupId>
                     <artifactId>exec-maven-plugin</artifactId>
-                    <version>3.0.0</version>
+                    <version>3.3.0</version>
                 </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index f7e443d5a22..2ee846d0e70 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -180,6 +180,36 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
+                    <execution>
+                        <id>clean previously populated config file</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate default configs for testing</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>run pyspark tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 1cb03958111..5440b8dbfa7 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -124,6 +124,7 @@ abstract class ConfEntry[T](val key: String, val converter: String => T, val doc
 
   def get(conf: Map[String, String]): T
   def get(conf: SQLConf): T
+  def getDefault(): T
   def help(asTable: Boolean = false): Unit
 
   override def toString: String = key
@@ -147,6 +148,10 @@ class ConfEntryWithDefault[T](key: String, converter: String => T, doc: String,
     }
   }
 
+  override def getDefault(): T = {
+    defaultValue
+  }
+
   override def help(asTable: Boolean = false): Unit = {
     if (!isInternal) {
       val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -182,6 +187,10 @@ class OptionalConfEntry[T](key: String, val rawConverter: String => T, doc: Stri
     }
   }
 
+  override def getDefault(): Option[T] = {
+    None
+  }
+
   override def help(asTable: Boolean = false): Unit = {
     if (!isInternal) {
       val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -2383,6 +2392,17 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     println("-----|-----------------|-------------|---------------|------")
   }
 
+  /**
+   * Returns all spark-rapids configs with their default values.
+   * This function is used to dump default configs, so that they
+   * could be used by the integration test.
+   */
+  def getAllConfigsWithDefault: Map[String, Any] = {
+    val allConfs = registeredConfs.clone()
+    allConfs.append(RapidsPrivateUtil.getPrivateConfigs(): _*)
+    allConfs.map(e => e.key -> e.getDefault).toMap
+  }
+
   def help(asTable: Boolean = false): Unit = {
     helpCommon(asTable)
     helpAdvanced(asTable)

From e7f1ec5ac429ced373a0c0fc2169062746d3ee72 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 11:00:46 -0700
Subject: [PATCH 02/40] address comments

---
 integration_tests/pom.xml                       |  6 +++---
 .../spark/rapids/tests/DumpDefaultConfigs.scala | 17 +++++++----------
 scala2.13/integration_tests/pom.xml             |  6 +++---
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index aabeb45187f..d9a1b504247 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -181,7 +181,7 @@
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
                     <execution>
-                        <id>clean previously populated config file</id>
+                        <id>clean-previously-populated-config-file</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>exec</goal>
@@ -195,7 +195,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>populate default configs for testing</id>
+                        <id>populate-default-configs-for-testing</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>java</goal>
@@ -211,7 +211,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>run pyspark tests</id>
+                        <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
                         <goals>
                             <goal>exec</goal>
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 57f77247216..0b9179471a2 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -17,10 +17,13 @@
 package com.nvidia.spark.rapids.tests
 
 import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
+import java.nio.charset.StandardCharsets
 import java.util.Locale
 
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.ShimReflectionUtils
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization.writePretty
 
 /**
  * Dump all spark-rapids configs with their defaults.
@@ -47,15 +50,12 @@ object DumpDefaultConfigs {
     val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
     val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
     val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
-    val fos: FileOutputStream = new FileOutputStream(outputPath)
-    withResource(fos) { _ =>
-      val bos: BufferedOutputStream = new BufferedOutputStream(fos)
-      withResource(bos) { _ =>
+    withResource(new FileOutputStream(outputPath)) { fos =>
+      withResource(new BufferedOutputStream(fos)) { bos =>
         format match {
           case Format.PLAIN =>
-            val dos: DataOutputStream = new DataOutputStream(bos)
-            withResource(dos) { _ =>
-              allConfs.foreach({ case (k, v) =>
+            withResource(new DataOutputStream(bos)) { dos =>
+              allConfs.foreach( { case (k, v) =>
                 val valStr = v match {
                   case some: Some[_] => some.getOrElse("")
                   case _ =>
@@ -69,9 +69,6 @@ object DumpDefaultConfigs {
               })
             }
           case Format.JSON =>
-            import org.json4s.jackson.Serialization.writePretty
-            import org.json4s.DefaultFormats
-            import java.nio.charset.StandardCharsets
             implicit val formats = DefaultFormats
             bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
           case _ =>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 2ee846d0e70..d403ef8e5e2 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -181,7 +181,7 @@
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
                     <execution>
-                        <id>clean previously populated config file</id>
+                        <id>clean-previously-populated-config-file</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>exec</goal>
@@ -195,7 +195,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>populate default configs for testing</id>
+                        <id>populate-default-configs-for-testing</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>java</goal>
@@ -211,7 +211,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>run pyspark tests</id>
+                        <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
                         <goals>
                             <goal>exec</goal>

From 37c0f92c765ada05a6470576708b48e90d1e18be Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:36:11 -0700
Subject: [PATCH 03/40] missing version update in 2.13 pom

---
 scala2.13/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 88df149ccdd..833282f5904 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -1439,7 +1439,7 @@ This will force full Scala code rebuild in downstream modules.
                 <plugin>
                     <groupId>org.codehaus.mojo</groupId>
                     <artifactId>exec-maven-plugin</artifactId>
-                    <version>3.0.0</version>
+                    <version>3.3.0</version>
                 </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>

From 85f05f35d956d501eb5ca864e86e09ed84aa3ada Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:39:29 -0700
Subject: [PATCH 04/40] fix match arms

---
 .../com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 0b9179471a2..bda989892e0 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -57,7 +57,8 @@ object DumpDefaultConfigs {
             withResource(new DataOutputStream(bos)) { dos =>
               allConfs.foreach( { case (k, v) =>
                 val valStr = v match {
-                  case some: Some[_] => some.getOrElse("")
+                  case Some(optVal) => optVal.toString
+                  case None => ""
                   case _ =>
                     if (v == null) {
                       ""

From 6ba7300daa440b706712345e8d44a618cde4f261 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:55:39 -0700
Subject: [PATCH 05/40] take the json file path as an input

---
 integration_tests/conftest.py                      | 3 +++
 integration_tests/run_pyspark_from_build.sh        | 2 +-
 integration_tests/src/main/python/conftest.py      | 6 ++++++
 integration_tests/src/main/python/spark_session.py | 5 ++---
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py
index f2e3435d0c5..3b7e1db15a8 100644
--- a/integration_tests/conftest.py
+++ b/integration_tests/conftest.py
@@ -62,3 +62,6 @@ def pytest_addoption(parser):
     parser.addoption(
         "--pyarrow_test", action='store_true', default=False, help="if enable pyarrow tests"
     )
+    parser.addoption(
+        "--default_configs_path", action="store", default=None, help="path to a JSON file that stores default configs for integration test"
+    )
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index b3f223d7a46..8c7ecf6b3da 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -15,7 +15,6 @@
 set -ex
 
 SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-export SPARK_RAPIDS_DEFAULT_CONFIGS_PATH=${SCRIPTPATH}/target/spark-rapids-default-configs.json
 cd "$SCRIPTPATH"
 
 if [[ $( echo ${SKIP_TESTS} | tr [:upper:] [:lower:] ) == "true" ]];
@@ -233,6 +232,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
+          --default_configs_path="${SCRIPTPATH}/target/spark-rapids-default-configs.json"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/main/python/conftest.py b/integration_tests/src/main/python/conftest.py
index 49f69f31837..1503c024fe8 100644
--- a/integration_tests/src/main/python/conftest.py
+++ b/integration_tests/src/main/python/conftest.py
@@ -156,6 +156,10 @@ def is_parquet_testing_tests_forced():
 def get_inject_oom_conf():
     return _inject_oom
 
+_default_configs_path = None
+
+def get_default_configs_path():
+    return _default_configs_path
 
 # For datagen: we expect a seed to be provided by the environment, or default to 0.
 # Note that tests can override their seed when calling into datagen by setting seed= in their tests.
@@ -304,6 +308,8 @@ def pytest_configure(config):
         raise Exception("not supported test type {}".format(test_type))
     global _is_parquet_testing_tests_forced
     _is_parquet_testing_tests_forced = config.getoption("force_parquet_testing_tests")
+    global _default_configs_path
+    _default_configs_path = config.getoption("default_configs_path")
 
 # For OOM injection: we expect a seed to be provided by the environment, or default to 1.
 # This is done such that any worker started by the xdist plugin for pytest will
diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
index 8356b49f48c..f67c3cd52a2 100644
--- a/integration_tests/src/main/python/spark_session.py
+++ b/integration_tests/src/main/python/spark_session.py
@@ -16,7 +16,7 @@
 import calendar, time
 from datetime import date, datetime
 from contextlib import contextmanager, ExitStack
-from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, is_per_test_ansi_mode_enabled
+from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, is_per_test_ansi_mode_enabled, get_default_configs_path
 from pyspark.sql import DataFrame
 from pyspark.sql.types import TimestampType, DateType, _acceptable_types
 from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version
@@ -41,8 +41,7 @@ def _from_scala_map(scala_map):
 # These settings can be overridden by specific tests if necessary.
 # Many of these are redundant with default settings for the configs but are set here explicitly
 # to ensure any cluster settings do not interfere with tests that assume the defaults.
-conf_file_path = os.environ['SPARK_RAPIDS_DEFAULT_CONFIGS_PATH']
-with open(conf_file_path) as conf_file:
+with open(get_default_configs_path()) as conf_file:
     _default_conf = json.load(conf_file)
 
 def _set_all_confs(conf):

From 8dbd354cb7f740fcd0cc1bcec2c31dabf4c13b8a Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 14:54:41 -0700
Subject: [PATCH 06/40] add the new config file in the assembly

---
 integration_tests/pom.xml                   | 66 +++++++++++----------
 integration_tests/run_pyspark_from_build.sh |  4 +-
 integration_tests/src/assembly/bin.xml      |  4 ++
 3 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index d9a1b504247..3d66a26bedc 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -78,6 +78,42 @@
     </dependencies>
     <build>
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>clean-previously-populated-config-file</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate-default-configs-for-testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>
@@ -180,36 +216,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 8c7ecf6b3da..4e125a6476b 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -76,7 +76,7 @@ else
         # Make sure we have Parquet version >= 1.12 in the dependency
         LOWEST_PARQUET_JAR=$(echo -e "$MIN_PARQUET_JAR\n$PARQUET_HADOOP_TESTS" | sort -V | head -1)
         export INCLUDE_PARQUET_HADOOP_TEST_JAR=$([[ "$LOWEST_PARQUET_JAR" == "$MIN_PARQUET_JAR" ]] && echo true || echo false)
-        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/target/rapids-4-spark_*.jar)
+        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/rapids-4-spark_*.jar)
         # the integration-test-spark3xx.jar, should not include the integration-test-spark3xxtest.jar
         TEST_JARS=$(echo "$TARGET_DIR"/rapids-4-spark-integration-tests*-$INTEGRATION_TEST_VERSION.jar)
     fi
@@ -232,7 +232,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
-          --default_configs_path="${SCRIPTPATH}/target/spark-rapids-default-configs.json"
+          --default_configs_path="${TARGET_DIR}/spark-rapids-default-configs.json"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 6209d0b152a..47f5f9e9d52 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -50,6 +50,10 @@
             <source>${project.build.outputDirectory}/rapids4spark-version-info.properties</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
+        <file>
+            <source>${spark.rapids.source.basedir}/integration_tests/target/spark-rapids-default-configs.json</source>
+            <outputDirectory>integration_tests</outputDirectory>
+        </file>
     </files>
     <fileSets>
         <fileSet>

From cb88561a5f14eb97facd9e34a7def587c7f9f21f Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 14:57:36 -0700
Subject: [PATCH 07/40] missing 2.13 change

---
 scala2.13/integration_tests/pom.xml | 66 ++++++++++++++++-------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index d403ef8e5e2..1115b016e13 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -78,6 +78,42 @@
     </dependencies>
     <build>
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>clean-previously-populated-config-file</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate-default-configs-for-testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>
@@ -180,36 +216,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->

From 3a26d1598c3080cd74dcc5f659693aef98d4abfe Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:04:59 -0700
Subject: [PATCH 08/40] use maven build directory var

---
 integration_tests/src/assembly/bin.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 47f5f9e9d52..b46d026ef80 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,7 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${spark.rapids.source.basedir}/integration_tests/target/spark-rapids-default-configs.json</source>
+            <source>${project.build.directory}/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>

From 5d59de7c62865806dbfd1096da2ed1c14894d440 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:07:32 -0700
Subject: [PATCH 09/40] revert unintended change

---
 integration_tests/run_pyspark_from_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 4e125a6476b..341daa6f838 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -76,7 +76,7 @@ else
         # Make sure we have Parquet version >= 1.12 in the dependency
         LOWEST_PARQUET_JAR=$(echo -e "$MIN_PARQUET_JAR\n$PARQUET_HADOOP_TESTS" | sort -V | head -1)
         export INCLUDE_PARQUET_HADOOP_TEST_JAR=$([[ "$LOWEST_PARQUET_JAR" == "$MIN_PARQUET_JAR" ]] && echo true || echo false)
-        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/rapids-4-spark_*.jar)
+        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/target/rapids-4-spark_*.jar)
         # the integration-test-spark3xx.jar, should not include the integration-test-spark3xxtest.jar
         TEST_JARS=$(echo "$TARGET_DIR"/rapids-4-spark-integration-tests*-$INTEGRATION_TEST_VERSION.jar)
     fi

From a7a04d05e6b272873acc798b348bb860fa0ada53 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:10:28 -0700
Subject: [PATCH 10/40] remove unnecessary clean

---
 integration_tests/pom.xml | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 3d66a26bedc..7d49b09af32 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -82,20 +82,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>populate-default-configs-for-testing</id>
                         <phase>package</phase>

From a7b50625b493717d6967673c2510cc884fdb231f Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 29 Jul 2024 14:57:52 -0700
Subject: [PATCH 11/40] Add a new interface to retrieve all configs with their
 defaults; Add a new stage for integration test to populate default configs

Signed-off-by: Jihoon Son <ghoonson@gmail.com>
---
 integration_tests/pom.xml                     | 30 +++++++
 integration_tests/run_pyspark_from_build.sh   |  1 +
 .../src/main/python/spark_session.py          | 26 ++----
 .../rapids/tests/DumpDefaultConfigs.scala     | 83 +++++++++++++++++++
 pom.xml                                       |  2 +-
 scala2.13/integration_tests/pom.xml           | 30 +++++++
 .../com/nvidia/spark/rapids/RapidsConf.scala  | 20 +++++
 7 files changed, 173 insertions(+), 19 deletions(-)
 create mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 3ea20b75610..389261537b8 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -180,6 +180,36 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
+                    <execution>
+                        <id>clean previously populated config file</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate default configs for testing</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>run pyspark tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 22a23349791..b3f223d7a46 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -15,6 +15,7 @@
 set -ex
 
 SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+export SPARK_RAPIDS_DEFAULT_CONFIGS_PATH=${SCRIPTPATH}/target/spark-rapids-default-configs.json
 cd "$SCRIPTPATH"
 
 if [[ $( echo ${SKIP_TESTS} | tr [:upper:] [:lower:] ) == "true" ]];
diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
index 26388617fff..8356b49f48c 100644
--- a/integration_tests/src/main/python/spark_session.py
+++ b/integration_tests/src/main/python/spark_session.py
@@ -21,6 +21,7 @@
 from pyspark.sql.types import TimestampType, DateType, _acceptable_types
 from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version
 from unittest.mock import patch
+import json
 
 def _from_scala_map(scala_map):
     ret = {}
@@ -40,23 +41,9 @@ def _from_scala_map(scala_map):
 # These settings can be overridden by specific tests if necessary.
 # Many of these are redundant with default settings for the configs but are set here explicitly
 # to ensure any cluster settings do not interfere with tests that assume the defaults.
-_default_conf = {
-    'spark.rapids.sql.castDecimalToFloat.enabled': 'false',
-    'spark.rapids.sql.castFloatToDecimal.enabled': 'false',
-    'spark.rapids.sql.castFloatToIntegralTypes.enabled': 'false',
-    'spark.rapids.sql.castFloatToString.enabled': 'false',
-    'spark.rapids.sql.castStringToFloat.enabled': 'false',
-    'spark.rapids.sql.castStringToTimestamp.enabled': 'false',
-    'spark.rapids.sql.fast.sample': 'false',
-    'spark.rapids.sql.hasExtendedYearValues': 'true',
-    'spark.rapids.sql.hashOptimizeSort.enabled': 'false',
-    'spark.rapids.sql.improvedFloatOps.enabled': 'false',
-    'spark.rapids.sql.incompatibleDateFormats.enabled': 'false',
-    'spark.rapids.sql.incompatibleOps.enabled': 'false',
-    'spark.rapids.sql.mode': 'executeongpu',
-    'spark.rapids.sql.variableFloatAgg.enabled': 'false',
-    'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true',
-}
+conf_file_path = os.environ['SPARK_RAPIDS_DEFAULT_CONFIGS_PATH']
+with open(conf_file_path) as conf_file:
+    _default_conf = json.load(conf_file)
 
 def _set_all_confs(conf):
     newconf = _default_conf.copy()
@@ -67,7 +54,10 @@ def _set_all_confs(conf):
     newconf.update(conf)
     for key, value in newconf.items():
         if _spark.conf.get(key, None) != value:
-            _spark.conf.set(key, value)
+            if isinstance(value, str):
+                _spark.conf.set(key, value)
+            else:
+                _spark.conf.set(key, str(value))
 
 def reset_spark_session_conf():
     """Reset all of the configs for a given spark session."""
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
new file mode 100644
index 00000000000..57f77247216
--- /dev/null
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.tests
+
+import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
+import java.util.Locale
+
+import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.ShimReflectionUtils
+
+/**
+ * Dump all spark-rapids configs with their defaults.
+ */
+object DumpDefaultConfigs {
+
+  object Format extends Enumeration {
+    type Format = Value
+    val PLAIN, JSON = Value
+  }
+
+  def main(args: Array[String]): Unit = {
+    if (args.length < 2) {
+      System.err.println(s"Usage: ${this.getClass.getCanonicalName} {format} {output_path}")
+      System.exit(1)
+    }
+
+    val format = Format.withName(args(0).toUpperCase(Locale.US))
+    val outputPath = args(1)
+
+    println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
+
+    // We use the reflection as RapidsConf should be accessed via the shim layer.
+    val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
+    val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
+    val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
+    val fos: FileOutputStream = new FileOutputStream(outputPath)
+    withResource(fos) { _ =>
+      val bos: BufferedOutputStream = new BufferedOutputStream(fos)
+      withResource(bos) { _ =>
+        format match {
+          case Format.PLAIN =>
+            val dos: DataOutputStream = new DataOutputStream(bos)
+            withResource(dos) { _ =>
+              allConfs.foreach({ case (k, v) =>
+                val valStr = v match {
+                  case some: Some[_] => some.getOrElse("")
+                  case _ =>
+                    if (v == null) {
+                      ""
+                    } else {
+                      v.toString
+                    }
+                }
+                dos.writeUTF(s"'${k}': '${valStr}',")
+              })
+            }
+          case Format.JSON =>
+            import org.json4s.jackson.Serialization.writePretty
+            import org.json4s.DefaultFormats
+            import java.nio.charset.StandardCharsets
+            implicit val formats = DefaultFormats
+            bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
+          case _ =>
+            System.err.println(s"Unknown format: ${format}")
+        }
+      }
+    }
+  }
+}
diff --git a/pom.xml b/pom.xml
index 358aa0c52b8..e7a6bb18489 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1439,7 +1439,7 @@ This will force full Scala code rebuild in downstream modules.
                 <plugin>
                     <groupId>org.codehaus.mojo</groupId>
                     <artifactId>exec-maven-plugin</artifactId>
-                    <version>3.0.0</version>
+                    <version>3.3.0</version>
                 </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 4c3ea72f341..ff1da425fbe 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -180,6 +180,36 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
+                    <execution>
+                        <id>clean previously populated config file</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate default configs for testing</id>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>run pyspark tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 14551471e66..fc25e463f05 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -124,6 +124,7 @@ abstract class ConfEntry[T](val key: String, val converter: String => T, val doc
 
   def get(conf: Map[String, String]): T
   def get(conf: SQLConf): T
+  def getDefault(): T
   def help(asTable: Boolean = false): Unit
 
   override def toString: String = key
@@ -147,6 +148,10 @@ class ConfEntryWithDefault[T](key: String, converter: String => T, doc: String,
     }
   }
 
+  override def getDefault(): T = {
+    defaultValue
+  }
+
   override def help(asTable: Boolean = false): Unit = {
     if (!isInternal) {
       val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -182,6 +187,10 @@ class OptionalConfEntry[T](key: String, val rawConverter: String => T, doc: Stri
     }
   }
 
+  override def getDefault(): Option[T] = {
+    None
+  }
+
   override def help(asTable: Boolean = false): Unit = {
     if (!isInternal) {
       val startupOnlyStr = if (isStartupOnly) "Startup" else "Runtime"
@@ -2374,6 +2383,17 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     println("-----|-----------------|-------------|---------------|------")
   }
 
+  /**
+   * Returns all spark-rapids configs with their default values.
+   * This function is used to dump default configs, so that they
+   * could be used by the integration test.
+   */
+  def getAllConfigsWithDefault: Map[String, Any] = {
+    val allConfs = registeredConfs.clone()
+    allConfs.append(RapidsPrivateUtil.getPrivateConfigs(): _*)
+    allConfs.map(e => e.key -> e.getDefault).toMap
+  }
+
   def help(asTable: Boolean = false): Unit = {
     helpCommon(asTable)
     helpAdvanced(asTable)

From 44837451f00dd1edd9dc2a2739961d41c212bd6e Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 11:00:46 -0700
Subject: [PATCH 12/40] address comments

---
 integration_tests/pom.xml                       |  6 +++---
 .../spark/rapids/tests/DumpDefaultConfigs.scala | 17 +++++++----------
 scala2.13/integration_tests/pom.xml             |  6 +++---
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 389261537b8..b4b5465e615 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -181,7 +181,7 @@
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
                     <execution>
-                        <id>clean previously populated config file</id>
+                        <id>clean-previously-populated-config-file</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>exec</goal>
@@ -195,7 +195,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>populate default configs for testing</id>
+                        <id>populate-default-configs-for-testing</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>java</goal>
@@ -211,7 +211,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>run pyspark tests</id>
+                        <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
                         <goals>
                             <goal>exec</goal>
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 57f77247216..0b9179471a2 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -17,10 +17,13 @@
 package com.nvidia.spark.rapids.tests
 
 import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
+import java.nio.charset.StandardCharsets
 import java.util.Locale
 
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.ShimReflectionUtils
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization.writePretty
 
 /**
  * Dump all spark-rapids configs with their defaults.
@@ -47,15 +50,12 @@ object DumpDefaultConfigs {
     val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
     val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
     val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
-    val fos: FileOutputStream = new FileOutputStream(outputPath)
-    withResource(fos) { _ =>
-      val bos: BufferedOutputStream = new BufferedOutputStream(fos)
-      withResource(bos) { _ =>
+    withResource(new FileOutputStream(outputPath)) { fos =>
+      withResource(new BufferedOutputStream(fos)) { bos =>
         format match {
           case Format.PLAIN =>
-            val dos: DataOutputStream = new DataOutputStream(bos)
-            withResource(dos) { _ =>
-              allConfs.foreach({ case (k, v) =>
+            withResource(new DataOutputStream(bos)) { dos =>
+              allConfs.foreach( { case (k, v) =>
                 val valStr = v match {
                   case some: Some[_] => some.getOrElse("")
                   case _ =>
@@ -69,9 +69,6 @@ object DumpDefaultConfigs {
               })
             }
           case Format.JSON =>
-            import org.json4s.jackson.Serialization.writePretty
-            import org.json4s.DefaultFormats
-            import java.nio.charset.StandardCharsets
             implicit val formats = DefaultFormats
             bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
           case _ =>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index ff1da425fbe..3b099ccb38a 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -181,7 +181,7 @@
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
                     <execution>
-                        <id>clean previously populated config file</id>
+                        <id>clean-previously-populated-config-file</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>exec</goal>
@@ -195,7 +195,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>populate default configs for testing</id>
+                        <id>populate-default-configs-for-testing</id>
                         <phase>verify</phase>
                         <goals>
                             <goal>java</goal>
@@ -211,7 +211,7 @@
                         </configuration>
                     </execution>
                     <execution>
-                        <id>run pyspark tests</id>
+                        <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
                         <goals>
                             <goal>exec</goal>

From 52fce4cd6d4c4555f5cad3edd2431dae1a8e5328 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:36:11 -0700
Subject: [PATCH 13/40] missing version update in 2.13 pom

---
 scala2.13/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 1161334a4c0..e0574430ba6 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -1439,7 +1439,7 @@ This will force full Scala code rebuild in downstream modules.
                 <plugin>
                     <groupId>org.codehaus.mojo</groupId>
                     <artifactId>exec-maven-plugin</artifactId>
-                    <version>3.0.0</version>
+                    <version>3.3.0</version>
                 </plugin>
                 <plugin>
                     <groupId>org.apache.maven.plugins</groupId>

From 35735c363440d00aced154c03c4e9d4bc9396db3 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:39:29 -0700
Subject: [PATCH 14/40] fix match arms

---
 .../com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 0b9179471a2..bda989892e0 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -57,7 +57,8 @@ object DumpDefaultConfigs {
             withResource(new DataOutputStream(bos)) { dos =>
               allConfs.foreach( { case (k, v) =>
                 val valStr = v match {
-                  case some: Some[_] => some.getOrElse("")
+                  case Some(optVal) => optVal.toString
+                  case None => ""
                   case _ =>
                     if (v == null) {
                       ""

From 9f9ef1e36b60c9329e9d64e4ad3aa55f89e7278f Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 13:55:39 -0700
Subject: [PATCH 15/40] take the json file path as an input

---
 integration_tests/conftest.py                      | 3 +++
 integration_tests/run_pyspark_from_build.sh        | 2 +-
 integration_tests/src/main/python/conftest.py      | 6 ++++++
 integration_tests/src/main/python/spark_session.py | 5 ++---
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py
index f2e3435d0c5..3b7e1db15a8 100644
--- a/integration_tests/conftest.py
+++ b/integration_tests/conftest.py
@@ -62,3 +62,6 @@ def pytest_addoption(parser):
     parser.addoption(
         "--pyarrow_test", action='store_true', default=False, help="if enable pyarrow tests"
     )
+    parser.addoption(
+        "--default_configs_path", action="store", default=None, help="path to a JSON file that stores default configs for integration test"
+    )
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index b3f223d7a46..8c7ecf6b3da 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -15,7 +15,6 @@
 set -ex
 
 SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-export SPARK_RAPIDS_DEFAULT_CONFIGS_PATH=${SCRIPTPATH}/target/spark-rapids-default-configs.json
 cd "$SCRIPTPATH"
 
 if [[ $( echo ${SKIP_TESTS} | tr [:upper:] [:lower:] ) == "true" ]];
@@ -233,6 +232,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
+          --default_configs_path="${SCRIPTPATH}/target/spark-rapids-default-configs.json"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/main/python/conftest.py b/integration_tests/src/main/python/conftest.py
index 49f69f31837..1503c024fe8 100644
--- a/integration_tests/src/main/python/conftest.py
+++ b/integration_tests/src/main/python/conftest.py
@@ -156,6 +156,10 @@ def is_parquet_testing_tests_forced():
 def get_inject_oom_conf():
     return _inject_oom
 
+_default_configs_path = None
+
+def get_default_configs_path():
+    return _default_configs_path
 
 # For datagen: we expect a seed to be provided by the environment, or default to 0.
 # Note that tests can override their seed when calling into datagen by setting seed= in their tests.
@@ -304,6 +308,8 @@ def pytest_configure(config):
         raise Exception("not supported test type {}".format(test_type))
     global _is_parquet_testing_tests_forced
     _is_parquet_testing_tests_forced = config.getoption("force_parquet_testing_tests")
+    global _default_configs_path
+    _default_configs_path = config.getoption("default_configs_path")
 
 # For OOM injection: we expect a seed to be provided by the environment, or default to 1.
 # This is done such that any worker started by the xdist plugin for pytest will
diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
index 8356b49f48c..f67c3cd52a2 100644
--- a/integration_tests/src/main/python/spark_session.py
+++ b/integration_tests/src/main/python/spark_session.py
@@ -16,7 +16,7 @@
 import calendar, time
 from datetime import date, datetime
 from contextlib import contextmanager, ExitStack
-from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, is_per_test_ansi_mode_enabled
+from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan, is_databricks_runtime, is_at_least_precommit_run, get_inject_oom_conf, is_per_test_ansi_mode_enabled, get_default_configs_path
 from pyspark.sql import DataFrame
 from pyspark.sql.types import TimestampType, DateType, _acceptable_types
 from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version
@@ -41,8 +41,7 @@ def _from_scala_map(scala_map):
 # These settings can be overridden by specific tests if necessary.
 # Many of these are redundant with default settings for the configs but are set here explicitly
 # to ensure any cluster settings do not interfere with tests that assume the defaults.
-conf_file_path = os.environ['SPARK_RAPIDS_DEFAULT_CONFIGS_PATH']
-with open(conf_file_path) as conf_file:
+with open(get_default_configs_path()) as conf_file:
     _default_conf = json.load(conf_file)
 
 def _set_all_confs(conf):

From d73581eccc0501d99a0cdf30204d0d72cc460f5c Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 14:54:41 -0700
Subject: [PATCH 16/40] add the new config file in the assembly

---
 integration_tests/pom.xml                   | 66 +++++++++++----------
 integration_tests/run_pyspark_from_build.sh |  4 +-
 integration_tests/src/assembly/bin.xml      |  4 ++
 3 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index b4b5465e615..073f49cb4e1 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -78,6 +78,42 @@
     </dependencies>
     <build>
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>clean-previously-populated-config-file</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate-default-configs-for-testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>
@@ -180,36 +216,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 8c7ecf6b3da..4e125a6476b 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -76,7 +76,7 @@ else
         # Make sure we have Parquet version >= 1.12 in the dependency
         LOWEST_PARQUET_JAR=$(echo -e "$MIN_PARQUET_JAR\n$PARQUET_HADOOP_TESTS" | sort -V | head -1)
         export INCLUDE_PARQUET_HADOOP_TEST_JAR=$([[ "$LOWEST_PARQUET_JAR" == "$MIN_PARQUET_JAR" ]] && echo true || echo false)
-        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/target/rapids-4-spark_*.jar)
+        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/rapids-4-spark_*.jar)
         # the integration-test-spark3xx.jar, should not include the integration-test-spark3xxtest.jar
         TEST_JARS=$(echo "$TARGET_DIR"/rapids-4-spark-integration-tests*-$INTEGRATION_TEST_VERSION.jar)
     fi
@@ -232,7 +232,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
-          --default_configs_path="${SCRIPTPATH}/target/spark-rapids-default-configs.json"
+          --default_configs_path="${TARGET_DIR}/spark-rapids-default-configs.json"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 6209d0b152a..47f5f9e9d52 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -50,6 +50,10 @@
             <source>${project.build.outputDirectory}/rapids4spark-version-info.properties</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
+        <file>
+            <source>${spark.rapids.source.basedir}/integration_tests/target/spark-rapids-default-configs.json</source>
+            <outputDirectory>integration_tests</outputDirectory>
+        </file>
     </files>
     <fileSets>
         <fileSet>

From 99ea474231efddd408aad285fb5ed44e7500810d Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 1 Aug 2024 14:57:36 -0700
Subject: [PATCH 17/40] missing 2.13 change

---
 scala2.13/integration_tests/pom.xml | 66 ++++++++++++++++-------------
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 3b099ccb38a..5e2ec78cdbe 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -78,6 +78,42 @@
     </dependencies>
     <build>
         <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>clean-previously-populated-config-file</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>exec</goal>
+                        </goals>
+                        <configuration>
+                            <executable>rm</executable>
+                            <arguments>
+                                <argument>-f</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>populate-default-configs-for-testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                        <configuration>
+                            <includeProjectDependencies>true</includeProjectDependencies>
+                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
+                            <classpathScope>provided</classpathScope>
+                            <arguments>
+                                <argument>json</argument>
+                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
+                            </arguments>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>
@@ -180,36 +216,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>verify</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>run-pyspark-tests</id>
                         <phase>verify</phase><!--run after packageing and collecting dependencies-->

From 519bb7d4f51c62664b9093fe20457ee91e77d63e Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:04:59 -0700
Subject: [PATCH 18/40] use maven build directory var

---
 integration_tests/src/assembly/bin.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 47f5f9e9d52..b46d026ef80 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,7 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${spark.rapids.source.basedir}/integration_tests/target/spark-rapids-default-configs.json</source>
+            <source>${project.build.directory}/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>

From c3ec423d511c0228a5ea4309410fa1d399ba5cd0 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:07:32 -0700
Subject: [PATCH 19/40] revert unintended change

---
 integration_tests/run_pyspark_from_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 4e125a6476b..341daa6f838 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -76,7 +76,7 @@ else
         # Make sure we have Parquet version >= 1.12 in the dependency
         LOWEST_PARQUET_JAR=$(echo -e "$MIN_PARQUET_JAR\n$PARQUET_HADOOP_TESTS" | sort -V | head -1)
         export INCLUDE_PARQUET_HADOOP_TEST_JAR=$([[ "$LOWEST_PARQUET_JAR" == "$MIN_PARQUET_JAR" ]] && echo true || echo false)
-        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/rapids-4-spark_*.jar)
+        PLUGIN_JARS=$(echo "$TARGET_DIR"/../../dist/target/rapids-4-spark_*.jar)
         # the integration-test-spark3xx.jar, should not include the integration-test-spark3xxtest.jar
         TEST_JARS=$(echo "$TARGET_DIR"/rapids-4-spark-integration-tests*-$INTEGRATION_TEST_VERSION.jar)
     fi

From 076351c158afd75ba366e6e7eeb42533f1573fb1 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 10:10:28 -0700
Subject: [PATCH 20/40] remove unnecessary clean

---
 integration_tests/pom.xml | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 073f49cb4e1..ab91639bb87 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -82,20 +82,6 @@
                 <groupId>org.codehaus.mojo</groupId>
                 <artifactId>exec-maven-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>populate-default-configs-for-testing</id>
                         <phase>package</phase>

From b02faa2788d60916e4734b6e8fc92dff1273d768 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 2 Aug 2024 16:54:50 -0700
Subject: [PATCH 21/40] Add things in RapidsConf

---
 dist/pom.xml                                  |  18 ++++
 integration_tests/pom.xml                     |  22 ----
 integration_tests/run_pyspark_from_build.sh   |   3 +-
 integration_tests/src/assembly/bin.xml        |   2 +-
 .../rapids/tests/DumpDefaultConfigs.scala     |  81 --------------
 .../com/nvidia/spark/rapids/RapidsConf.scala  | 101 +++++++++++++++---
 6 files changed, 106 insertions(+), 121 deletions(-)
 delete mode 100644 integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala

diff --git a/dist/pom.xml b/dist/pom.xml
index abf6232c1a2..730ae5512a3 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -387,6 +387,23 @@ self.log("... OK")
                             </target>
                         </configuration>
                     </execution>
+                    <execution>
+                        <id>populate_default_configs_for_testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <configuration>
+                            <target>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                    <arg value="dump-default-configs"/>
+                                    <arg value="json"/>
+                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
+                                </java>
+                            </target>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>update_config_docs</id>
                         <phase>verify</phase>
@@ -400,6 +417,7 @@ self.log("... OK")
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
                                         <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                            <arg value="help"/>
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index ab91639bb87..e868bf0b0a2 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -78,28 +78,6 @@
     </dependencies>
     <build>
         <plugins>
-            <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>exec-maven-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 341daa6f838..02a0efadbd5 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -223,6 +223,7 @@ else
 
     REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror
     STD_INPUT_PATH="$INPUT_PATH"/src/test/resources
+    DEFAULT_CONFIGS_PATH="${SCRIPTPATH}"/../dist/target/spark-rapids-default-configs.json
     TEST_COMMON_OPTS=(-v
           -r"$REPORT_CHARS"
           "$TEST_TAGS"
@@ -232,7 +233,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
-          --default_configs_path="${TARGET_DIR}/spark-rapids-default-configs.json"
+          --default_configs_path="${DEFAULT_CONFIGS_PATH}"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index b46d026ef80..a0b0bc1f43b 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,7 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${project.build.directory}/spark-rapids-default-configs.json</source>
+            <source>${spark.rapids.source.basedir}/dist/target/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
deleted file mode 100644
index bda989892e0..00000000000
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.nvidia.spark.rapids.tests
-
-import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
-import java.nio.charset.StandardCharsets
-import java.util.Locale
-
-import com.nvidia.spark.rapids.Arm.withResource
-import com.nvidia.spark.rapids.ShimReflectionUtils
-import org.json4s.DefaultFormats
-import org.json4s.jackson.Serialization.writePretty
-
-/**
- * Dump all spark-rapids configs with their defaults.
- */
-object DumpDefaultConfigs {
-
-  object Format extends Enumeration {
-    type Format = Value
-    val PLAIN, JSON = Value
-  }
-
-  def main(args: Array[String]): Unit = {
-    if (args.length < 2) {
-      System.err.println(s"Usage: ${this.getClass.getCanonicalName} {format} {output_path}")
-      System.exit(1)
-    }
-
-    val format = Format.withName(args(0).toUpperCase(Locale.US))
-    val outputPath = args(1)
-
-    println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
-
-    // We use the reflection as RapidsConf should be accessed via the shim layer.
-    val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
-    val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
-    val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
-    withResource(new FileOutputStream(outputPath)) { fos =>
-      withResource(new BufferedOutputStream(fos)) { bos =>
-        format match {
-          case Format.PLAIN =>
-            withResource(new DataOutputStream(bos)) { dos =>
-              allConfs.foreach( { case (k, v) =>
-                val valStr = v match {
-                  case Some(optVal) => optVal.toString
-                  case None => ""
-                  case _ =>
-                    if (v == null) {
-                      ""
-                    } else {
-                      v.toString
-                    }
-                }
-                dos.writeUTF(s"'${k}': '${valStr}',")
-              })
-            }
-          case Format.JSON =>
-            implicit val formats = DefaultFormats
-            bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
-          case _ =>
-            System.err.println(s"Unknown format: ${format}")
-        }
-      }
-    }
-  }
-}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index fc25e463f05..5a3842cece1 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -15,15 +15,19 @@
  */
 package com.nvidia.spark.rapids
 
-import java.io.{File, FileOutputStream}
+import java.io.{BufferedOutputStream, DataOutputStream, File, FileOutputStream}
+import java.nio.charset.StandardCharsets
 import java.util
-
-import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, ListBuffer}
+import java.util.Locale
 
 import ai.rapids.cudf.Cuda
+import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.jni.RmmSpark.OomInjectionType
 import com.nvidia.spark.rapids.lore.{LoreId, OutputLoreId}
+import org.json4s.DefaultFormats
+import org.json4s.jackson.Serialization.writePretty
+import scala.collection.JavaConverters._
+import scala.collection.mutable.{HashMap, ListBuffer}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
@@ -2394,6 +2398,42 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     allConfs.map(e => e.key -> e.getDefault).toMap
   }
 
+  object Format extends Enumeration {
+    type Format = Value
+    val PLAIN, JSON = Value
+  }
+
+  def dumpAllConfigsWithDefault(format: Format.Value, outputPath: String): Unit = {
+    val allConfs: Map[String, Any] = RapidsConf.getAllConfigsWithDefault
+    withResource(new FileOutputStream(outputPath)) { fos =>
+      withResource(new BufferedOutputStream(fos)) { bos =>
+        format match {
+          case Format.PLAIN =>
+            withResource(new DataOutputStream(bos)) { dos =>
+              allConfs.foreach( { case (k, v) =>
+                val valStr = v match {
+                  case Some(optVal) => optVal.toString
+                  case None => ""
+                  case _ =>
+                    if (v == null) {
+                      ""
+                    } else {
+                      v.toString
+                    }
+                }
+                dos.writeUTF(s"'${k}': '${valStr}',")
+              })
+            }
+          case Format.JSON =>
+            implicit val formats = DefaultFormats
+            bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
+          case _ =>
+            System.err.println(s"Unknown format: ${format}")
+        }
+      }
+    }
+  }
+
   def help(asTable: Boolean = false): Unit = {
     helpCommon(asTable)
     helpAdvanced(asTable)
@@ -2528,19 +2568,48 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     GpuOverrides.parts.values.toSeq.sortBy(_.tag.toString).foreach(_.confHelp(asTable))
   }
   def main(args: Array[String]): Unit = {
-    // Include the configs in PythonConfEntries
-    com.nvidia.spark.rapids.python.PythonConfEntries.init()
-    val configs = new FileOutputStream(new File(args(0)))
-    Console.withOut(configs) {
-      Console.withErr(configs) {
-        RapidsConf.helpCommon(true)
-      }
+    if (args.length < 1) {
+      System.err.println(s"Usage: ${this.getClass.getCanonicalName} {command} <argument> ...")
+      System.exit(1)
     }
-    val advanced = new FileOutputStream(new File(args(1)))
-    Console.withOut(advanced) {
-      Console.withErr(advanced) {
-        RapidsConf.helpAdvanced(true)
-      }
+
+    val command = args(0)
+
+    command match {
+      case "help" =>
+        if (args.length < 3) {
+          System.err.println(s"Usage: ${this.getClass.getCanonicalName} ${command}" +
+            s" {common_configs_path} {advanced_configs_path}")
+          System.exit(1)
+        }
+
+        // Include the configs in PythonConfEntries
+        com.nvidia.spark.rapids.python.PythonConfEntries.init()
+        val configs = new FileOutputStream(new File(args(1)))
+        Console.withOut(configs) {
+          Console.withErr(configs) {
+            RapidsConf.helpCommon(true)
+          }
+        }
+        val advanced = new FileOutputStream(new File(args(2)))
+        Console.withOut(advanced) {
+          Console.withErr(advanced) {
+            RapidsConf.helpAdvanced(true)
+          }
+        }
+      case "dump-default-configs" =>
+        if (args.length < 3) {
+          System.err.println(s"Usage: ${this.getClass.getCanonicalName} ${command}" +
+            s" {format} {output_path}")
+          System.exit(1)
+        }
+
+        val format: RapidsConf.Format.Value =
+          RapidsConf.Format.withName(args(1).toUpperCase(Locale.US))
+        val outputPath = args(2)
+
+        println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
+        RapidsConf.dumpAllConfigsWithDefault(format, outputPath)
     }
   }
 }

From 4a3c862a5c6f3aac24be14def44940a2c68163db Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 5 Aug 2024 14:07:24 -0700
Subject: [PATCH 22/40] missing change for 2.13

---
 scala2.13/dist/pom.xml              | 18 +++++++++++++++
 scala2.13/integration_tests/pom.xml | 36 -----------------------------
 2 files changed, 18 insertions(+), 36 deletions(-)

diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index b9a958493d8..9a7880b763d 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -387,6 +387,23 @@ self.log("... OK")
                             </target>
                         </configuration>
                     </execution>
+                    <execution>
+                        <id>populate_default_configs_for_testing</id>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <configuration>
+                            <target>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                    <arg value="dump-default-configs"/>
+                                    <arg value="json"/>
+                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
+                                </java>
+                            </target>
+                        </configuration>
+                    </execution>
                     <execution>
                         <id>update_config_docs</id>
                         <phase>verify</phase>
@@ -400,6 +417,7 @@ self.log("... OK")
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
                                         <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                            <arg value="help"/>
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 5e2ec78cdbe..25d17a3f890 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -78,42 +78,6 @@
     </dependencies>
     <build>
         <plugins>
-            <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>exec-maven-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>populate-default-configs-for-testing</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>java</goal>
-                        </goals>
-                        <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
             <plugin>
                 <artifactId>maven-assembly-plugin</artifactId>
                 <version>3.6.0</version>

From 8a7f31692749ecdaa010d5d3a8fb5de8d190f378 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 5 Aug 2024 15:26:27 -0700
Subject: [PATCH 23/40] fix directory path for scala 2.13

---
 integration_tests/src/assembly/bin.xml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index a0b0bc1f43b..19f1f1aaba0 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,8 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${spark.rapids.source.basedir}/dist/target/spark-rapids-default-configs.json</source>
+            <!--This should point to the proper `dist` directory for the scala version being used-->
+            <source>${project.basedir}/../dist/target/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>

From ddb145a21f7e93d151c7cfff91d290ae9ee1aa00 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 5 Aug 2024 16:22:48 -0700
Subject: [PATCH 24/40] exclude jackson from spark-hive

---
 integration_tests/pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index e868bf0b0a2..3f6fe864903 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -68,6 +68,16 @@
             <!-- for hive udf test cases -->
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <!-- https://mvnrepository.com/artifact/com.github.scopt/scopt -->
         <dependency>

From 0542fdfb501b013f02edda0795ffb14a20b2c038 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 5 Aug 2024 16:28:22 -0700
Subject: [PATCH 25/40] missing change for 2.13

---
 scala2.13/integration_tests/pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 25d17a3f890..6bbd314ce0e 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -68,6 +68,16 @@
             <!-- for hive udf test cases -->
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-hive_${scala.binary.version}</artifactId>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>com.fasterxml.jackson.core</groupId>
+                    <artifactId>jackson-databind</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <!-- https://mvnrepository.com/artifact/com.github.scopt/scopt -->
         <dependency>

From dab6f76bf45502c627fe00d6ddf3776c759e6e41 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Tue, 6 Aug 2024 11:16:36 -0700
Subject: [PATCH 26/40] exclude old jackson stuff from iceberg

---
 pom.xml           | 14 ++++++++++++++
 scala2.13/pom.xml | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/pom.xml b/pom.xml
index e7a6bb18489..54046364651 100644
--- a/pom.xml
+++ b/pom.xml
@@ -939,6 +939,20 @@
                 <artifactId>iceberg-core</artifactId>
                 <version>${iceberg.version}</version>
                 <scope>provided</scope>
+                <exclusions>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-annotations</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-core</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-databind</artifactId>
+                    </exclusion>
+                </exclusions>
             </dependency>
             <dependency>
               <groupId>org.apache.spark</groupId>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index e0574430ba6..cc33e64bf66 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -939,6 +939,20 @@
                 <artifactId>iceberg-core</artifactId>
                 <version>${iceberg.version}</version>
                 <scope>provided</scope>
+                <exclusions>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-annotations</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-core</artifactId>
+                    </exclusion>
+                    <exclusion>
+                        <groupId>com.fasterxml.jackson.core</groupId>
+                        <artifactId>jackson-databind</artifactId>
+                    </exclusion>
+                </exclusions>
             </dependency>
             <dependency>
               <groupId>org.apache.spark</groupId>

From 49a63d60ba73be8721112eaa41d7602554cf8db7 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Tue, 6 Aug 2024 14:35:33 -0700
Subject: [PATCH 27/40] copyrights

---
 integration_tests/conftest.py          | 2 +-
 integration_tests/src/assembly/bin.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py
index 3b7e1db15a8..1587b6591bd 100644
--- a/integration_tests/conftest.py
+++ b/integration_tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 19f1f1aaba0..05593b1ada6 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2020-2023, NVIDIA CORPORATION.
+  Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.

From 31306f5f90287cb7ce7196e1a56e6a99570625de Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Tue, 6 Aug 2024 18:22:23 -0700
Subject: [PATCH 28/40] antrun

---
 integration_tests/pom.xml | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 7d49b09af32..7b5bdca3c93 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -79,23 +79,30 @@
     <build>
         <plugins>
             <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>exec-maven-plugin</artifactId>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-antrun-plugin</artifactId>
                 <executions>
                     <execution>
                         <id>populate-default-configs-for-testing</id>
                         <phase>package</phase>
                         <goals>
-                            <goal>java</goal>
+                            <goal>run</goal>
                         </goals>
                         <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
+                            <target>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <java classname="com.nvidia.spark.rapids.tests.DumpDefaultConfigs" failonerror="true" fork="false">
+                                    <arg value="json"/>
+                                    <arg value="${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json"/>
+                                </java>
+                            </target>
+                            <!--                            <includeProjectDependencies>true</includeProjectDependencies>-->
+<!--                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>-->
+<!--                            <classpathScope>provided</classpathScope>-->
+<!--                            <arguments>-->
+<!--                                <argument>json</argument>-->
+<!--                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>-->
+<!--                            </arguments>-->
                         </configuration>
                     </execution>
                 </executions>

From c02e1c4460cd0697fd4be6342d23e8d6074f5e09 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Tue, 6 Aug 2024 18:26:58 -0700
Subject: [PATCH 29/40] fix config file path

---
 integration_tests/run_pyspark_from_build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 02a0efadbd5..a0e87150a9e 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -223,7 +223,7 @@ else
 
     REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror
     STD_INPUT_PATH="$INPUT_PATH"/src/test/resources
-    DEFAULT_CONFIGS_PATH="${SCRIPTPATH}"/../dist/target/spark-rapids-default-configs.json
+    DEFAULT_CONFIGS_PATH="${TARGET_DIR}"/../../dist/target/spark-rapids-default-configs.json
     TEST_COMMON_OPTS=(-v
           -r"$REPORT_CHARS"
           "$TEST_TAGS"

From b04484a98c4040c91a84b7869dd55e4f824ab579 Mon Sep 17 00:00:00 2001
From: Gera Shegalov <gera@apache.org>
Date: Wed, 7 Aug 2024 02:41:56 -0700
Subject: [PATCH 30/40] move most dump changes to rapids conf

- fork generation step with maven.compile.classpath
- change to a phase before package

Signed-off-by: Gera Shegalov <gera@apache.org>
---
 integration_tests/pom.xml                     |  9 ++--
 integration_tests/src/assembly/bin.xml        |  2 +-
 .../rapids/tests/DumpDefaultConfigs.scala     | 49 +------------------
 .../com/nvidia/spark/rapids/RapidsConf.scala  | 44 +++++++++++++++++
 4 files changed, 53 insertions(+), 51 deletions(-)

diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 3e0811ffd8c..f69ee397ec9 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -94,16 +94,19 @@
                 <executions>
                     <execution>
                         <id>populate-default-configs-for-testing</id>
-                        <phase>package</phase>
+                        <phase>generate-test-resources</phase>
                         <goals>
                             <goal>run</goal>
                         </goals>
                         <configuration>
                             <target>
                                 <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
-                                <java classname="com.nvidia.spark.rapids.tests.DumpDefaultConfigs" failonerror="true" fork="false">
+                                <java classname="com.nvidia.spark.rapids.tests.DumpDefaultConfigs"
+                                    failonerror="true"
+                                    classpathref="maven.compile.classpath"
+                                    fork="true">
                                     <arg value="json"/>
-                                    <arg value="${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json"/>
+                                    <arg value="${project.build.outputDirectory}/spark-rapids-default-configs.json"/>
                                 </java>
                             </target>
                             <!--                            <includeProjectDependencies>true</includeProjectDependencies>-->
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 29f9c15fb88..72a118a48cd 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,7 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${project.build.directory}/spark-rapids-default-configs.json</source>
+            <source>${project.build.outputDirectory}/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index bda989892e0..20dc1e90244 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -16,66 +16,21 @@
 
 package com.nvidia.spark.rapids.tests
 
-import java.io.{BufferedOutputStream, DataOutputStream, FileOutputStream}
-import java.nio.charset.StandardCharsets
-import java.util.Locale
+import org.apache.commons.lang3.reflect.MethodUtils
 
-import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.ShimReflectionUtils
-import org.json4s.DefaultFormats
-import org.json4s.jackson.Serialization.writePretty
 
 /**
  * Dump all spark-rapids configs with their defaults.
  */
 object DumpDefaultConfigs {
-
-  object Format extends Enumeration {
-    type Format = Value
-    val PLAIN, JSON = Value
-  }
-
   def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println(s"Usage: ${this.getClass.getCanonicalName} {format} {output_path}")
       System.exit(1)
     }
-
-    val format = Format.withName(args(0).toUpperCase(Locale.US))
-    val outputPath = args(1)
-
-    println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
-
     // We use the reflection as RapidsConf should be accessed via the shim layer.
     val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
-    val m = clazz.getDeclaredMethod("getAllConfigsWithDefault")
-    val allConfs: Map[String, Any] = m.invoke(null).asInstanceOf[Map[String, Any]]
-    withResource(new FileOutputStream(outputPath)) { fos =>
-      withResource(new BufferedOutputStream(fos)) { bos =>
-        format match {
-          case Format.PLAIN =>
-            withResource(new DataOutputStream(bos)) { dos =>
-              allConfs.foreach( { case (k, v) =>
-                val valStr = v match {
-                  case Some(optVal) => optVal.toString
-                  case None => ""
-                  case _ =>
-                    if (v == null) {
-                      ""
-                    } else {
-                      v.toString
-                    }
-                }
-                dos.writeUTF(s"'${k}': '${valStr}',")
-              })
-            }
-          case Format.JSON =>
-            implicit val formats = DefaultFormats
-            bos.write(writePretty(allConfs).getBytes(StandardCharsets.UTF_8))
-          case _ =>
-            System.err.println(s"Unknown format: ${format}")
-        }
-      }
-    }
+    MethodUtils.invokeStaticMethod(clazz, "dumpConfigsWithDefault", args)
   }
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index d7b5782b335..17029d742fd 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -2576,6 +2576,50 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
         RapidsConf.dumpAllConfigsWithDefault(format, outputPath)
     }
   }
+
+  object Format extends Enumeration {
+    type Format = Value
+    val PLAIN, JSON = Value
+  }
+
+  def dumpConfigsWithDefault(args: Array[String]): Unit = {
+    import com.nvidia.spark.rapids.Arm._
+
+    val format = Format.withName(args(0).toUpperCase(java.util.Locale.US))
+    val outputPath = args(1)
+
+    println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
+
+    val allConfs = getAllConfigsWithDefault
+    withResource(new java.io.FileOutputStream(outputPath)) { fos =>
+      withResource(new java.io.BufferedOutputStream(fos)) { bos =>
+        format match {
+          case Format.PLAIN =>
+            withResource(new java.io.DataOutputStream(bos)) { dos =>
+              allConfs.foreach( { case (k, v) =>
+                val valStr = v match {
+                  case Some(optVal) => optVal.toString
+                  case None => ""
+                  case _ =>
+                    if (v == null) {
+                      ""
+                    } else {
+                      v.toString
+                    }
+                }
+                dos.writeUTF(s"'${k}': '${valStr}',")
+              })
+            }
+          case Format.JSON =>
+            implicit val formats = org.json4s.DefaultFormats
+            bos.write(org.json4s.jackson.Serialization.writePretty(allConfs)
+              .getBytes(java.nio.charset.StandardCharsets.UTF_8))
+          case _ =>
+            System.err.println(s"Unknown format: ${format}")
+        }
+      }
+    }
+  }
 }
 
 class RapidsConf(conf: Map[String, String]) extends Logging {

From 3e59847ead0489a7a0176edbd405f4f9815a1269 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Wed, 7 Aug 2024 11:51:58 -0700
Subject: [PATCH 31/40] clean up after merge

---
 dist/pom.xml                                  | 18 -----
 integration_tests/pom.xml                     |  9 +--
 integration_tests/run_pyspark_from_build.sh   |  4 +-
 integration_tests/src/assembly/bin.xml        |  2 +-
 .../rapids/tests/DumpDefaultConfigs.scala     |  3 +-
 .../com/nvidia/spark/rapids/RapidsConf.scala  | 70 ++++++-------------
 6 files changed, 28 insertions(+), 78 deletions(-)

diff --git a/dist/pom.xml b/dist/pom.xml
index 730ae5512a3..abf6232c1a2 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -387,23 +387,6 @@ self.log("... OK")
                             </target>
                         </configuration>
                     </execution>
-                    <execution>
-                        <id>populate_default_configs_for_testing</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>run</goal>
-                        </goals>
-                        <configuration>
-                            <target>
-                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
-                                <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
-                                    <arg value="dump-default-configs"/>
-                                    <arg value="json"/>
-                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
-                                </java>
-                            </target>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>update_config_docs</id>
                         <phase>verify</phase>
@@ -417,7 +400,6 @@ self.log("... OK")
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
                                         <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
-                                            <arg value="help"/>
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index f69ee397ec9..8829cd2d6ec 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -106,16 +106,9 @@
                                     classpathref="maven.compile.classpath"
                                     fork="true">
                                     <arg value="json"/>
-                                    <arg value="${project.build.outputDirectory}/spark-rapids-default-configs.json"/>
+                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
                                 </java>
                             </target>
-                            <!--                            <includeProjectDependencies>true</includeProjectDependencies>-->
-<!--                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>-->
-<!--                            <classpathScope>provided</classpathScope>-->
-<!--                            <arguments>-->
-<!--                                <argument>json</argument>-->
-<!--                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>-->
-<!--                            </arguments>-->
                         </configuration>
                     </execution>
                 </executions>
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 27e71de5922..cd6cc5cef00 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -223,7 +223,7 @@ else
 
     REPORT_CHARS=${REPORT_CHARS:="fE"} # default as (f)ailed, (E)rror
     STD_INPUT_PATH="$INPUT_PATH"/src/test/resources
-    DEFAULT_CONFIGS_PATH="${TARGET_DIR}"/../../dist/target/spark-rapids-default-configs.json
+    DEFAULT_CONFIGS_PATH=${DEFAULT_CONFIGS_PATH:-${TARGET_DIR}/spark-rapids-default-configs.json}
     TEST_COMMON_OPTS=(-v
           -r"$REPORT_CHARS"
           "$TEST_TAGS"
@@ -233,7 +233,7 @@ else
           "$TEST_ARGS"
           $RUN_TEST_PARAMS
           --junitxml=TEST-pytest-`date +%s%N`.xml
-          --default_configs_path="${TARGET_DIR}/spark-rapids-default-configs.json"
+          --default_configs_path="${DEFAULT_CONFIGS_PATH}"
           "$@")
 
     NUM_LOCAL_EXECS=${NUM_LOCAL_EXECS:-0}
diff --git a/integration_tests/src/assembly/bin.xml b/integration_tests/src/assembly/bin.xml
index 72a118a48cd..29f9c15fb88 100644
--- a/integration_tests/src/assembly/bin.xml
+++ b/integration_tests/src/assembly/bin.xml
@@ -51,7 +51,7 @@
             <outputDirectory>integration_tests</outputDirectory>
         </file>
         <file>
-            <source>${project.build.outputDirectory}/spark-rapids-default-configs.json</source>
+            <source>${project.build.directory}/spark-rapids-default-configs.json</source>
             <outputDirectory>integration_tests</outputDirectory>
         </file>
     </files>
diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 20dc1e90244..79451ced31b 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -16,9 +16,8 @@
 
 package com.nvidia.spark.rapids.tests
 
-import org.apache.commons.lang3.reflect.MethodUtils
-
 import com.nvidia.spark.rapids.ShimReflectionUtils
+import org.apache.commons.lang3.reflect.MethodUtils
 
 /**
  * Dump all spark-rapids configs with their defaults.
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 17029d742fd..718ad4f1c69 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -21,7 +21,6 @@ import java.util
 import java.util.Locale
 
 import ai.rapids.cudf.Cuda
-import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.jni.RmmSpark.OomInjectionType
 import com.nvidia.spark.rapids.lore.{LoreId, OutputLoreId}
 import org.json4s.DefaultFormats
@@ -2532,48 +2531,25 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     GpuOverrides.parts.values.toSeq.sortBy(_.tag.toString).foreach(_.confHelp(asTable))
   }
   def main(args: Array[String]): Unit = {
-    if (args.length < 1) {
-      System.err.println(s"Usage: ${this.getClass.getCanonicalName} {command} <argument> ...")
+    if (args.length < 2) {
+      System.err.println(s"Usage: ${this.getClass.getCanonicalName}" +
+        s" {common_configs_path} {advanced_configs_path}")
       System.exit(1)
     }
 
-    val command = args(0)
-
-    command match {
-      case "help" =>
-        if (args.length < 3) {
-          System.err.println(s"Usage: ${this.getClass.getCanonicalName} ${command}" +
-            s" {common_configs_path} {advanced_configs_path}")
-          System.exit(1)
-        }
-
-        // Include the configs in PythonConfEntries
-        com.nvidia.spark.rapids.python.PythonConfEntries.init()
-        val configs = new FileOutputStream(new File(args(1)))
-        Console.withOut(configs) {
-          Console.withErr(configs) {
-            RapidsConf.helpCommon(true)
-          }
-        }
-        val advanced = new FileOutputStream(new File(args(2)))
-        Console.withOut(advanced) {
-          Console.withErr(advanced) {
-            RapidsConf.helpAdvanced(true)
-          }
-        }
-      case "dump-default-configs" =>
-        if (args.length < 3) {
-          System.err.println(s"Usage: ${this.getClass.getCanonicalName} ${command}" +
-            s" {format} {output_path}")
-          System.exit(1)
-        }
-
-        val format: RapidsConf.Format.Value =
-          RapidsConf.Format.withName(args(1).toUpperCase(Locale.US))
-        val outputPath = args(2)
-
-        println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
-        RapidsConf.dumpAllConfigsWithDefault(format, outputPath)
+    // Include the configs in PythonConfEntries
+    com.nvidia.spark.rapids.python.PythonConfEntries.init()
+    val configs = new FileOutputStream(new File(args(0)))
+    Console.withOut(configs) {
+      Console.withErr(configs) {
+        RapidsConf.helpCommon(true)
+      }
+    }
+    val advanced = new FileOutputStream(new File(args(1)))
+    Console.withOut(advanced) {
+      Console.withErr(advanced) {
+        RapidsConf.helpAdvanced(true)
+      }
     }
   }
 
@@ -2585,17 +2561,17 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
   def dumpConfigsWithDefault(args: Array[String]): Unit = {
     import com.nvidia.spark.rapids.Arm._
 
-    val format = Format.withName(args(0).toUpperCase(java.util.Locale.US))
+    val format = Format.withName(args(0).toUpperCase(Locale.US))
     val outputPath = args(1)
 
     println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
 
     val allConfs = getAllConfigsWithDefault
-    withResource(new java.io.FileOutputStream(outputPath)) { fos =>
-      withResource(new java.io.BufferedOutputStream(fos)) { bos =>
+    withResource(new FileOutputStream(outputPath)) { fos =>
+      withResource(new BufferedOutputStream(fos)) { bos =>
         format match {
           case Format.PLAIN =>
-            withResource(new java.io.DataOutputStream(bos)) { dos =>
+            withResource(new DataOutputStream(bos)) { dos =>
               allConfs.foreach( { case (k, v) =>
                 val valStr = v match {
                   case Some(optVal) => optVal.toString
@@ -2611,9 +2587,9 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
               })
             }
           case Format.JSON =>
-            implicit val formats = org.json4s.DefaultFormats
-            bos.write(org.json4s.jackson.Serialization.writePretty(allConfs)
-              .getBytes(java.nio.charset.StandardCharsets.UTF_8))
+            implicit val formats = DefaultFormats
+            bos.write(writePretty(allConfs)
+              .getBytes(StandardCharsets.UTF_8))
           case _ =>
             System.err.println(s"Unknown format: ${format}")
         }

From fa380fb475e29cc85cc362727114fb4ea468c28d Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Wed, 7 Aug 2024 11:52:26 -0700
Subject: [PATCH 32/40] scala 2.13

---
 scala2.13/dist/pom.xml              | 18 -------------
 scala2.13/integration_tests/pom.xml | 39 +++++++++++------------------
 2 files changed, 14 insertions(+), 43 deletions(-)

diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index 9a7880b763d..b9a958493d8 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -387,23 +387,6 @@ self.log("... OK")
                             </target>
                         </configuration>
                     </execution>
-                    <execution>
-                        <id>populate_default_configs_for_testing</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>run</goal>
-                        </goals>
-                        <configuration>
-                            <target>
-                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
-                                <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
-                                    <arg value="dump-default-configs"/>
-                                    <arg value="json"/>
-                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
-                                </java>
-                            </target>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>update_config_docs</id>
                         <phase>verify</phase>
@@ -417,7 +400,6 @@ self.log("... OK")
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
                                         <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
-                                            <arg value="help"/>
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 7e30a7927e4..6380d0dabb3 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -89,37 +89,26 @@
     <build>
         <plugins>
             <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>exec-maven-plugin</artifactId>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-antrun-plugin</artifactId>
                 <executions>
-                    <execution>
-                        <id>clean-previously-populated-config-file</id>
-                        <phase>package</phase>
-                        <goals>
-                            <goal>exec</goal>
-                        </goals>
-                        <configuration>
-                            <executable>rm</executable>
-                            <arguments>
-                                <argument>-f</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
-                        </configuration>
-                    </execution>
                     <execution>
                         <id>populate-default-configs-for-testing</id>
-                        <phase>package</phase>
+                        <phase>generate-test-resources</phase>
                         <goals>
-                            <goal>java</goal>
+                            <goal>run</goal>
                         </goals>
                         <configuration>
-                            <includeProjectDependencies>true</includeProjectDependencies>
-                            <mainClass>com.nvidia.spark.rapids.tests.DumpDefaultConfigs</mainClass>
-                            <classpathScope>provided</classpathScope>
-                            <arguments>
-                                <argument>json</argument>
-                                <argument>${spark.rapids.source.basedir}/${rapids.module}/target/spark-rapids-default-configs.json</argument>
-                            </arguments>
+                            <target>
+                                <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
+                                <java classname="com.nvidia.spark.rapids.tests.DumpDefaultConfigs"
+                                    failonerror="true"
+                                    classpathref="maven.compile.classpath"
+                                    fork="true">
+                                    <arg value="json"/>
+                                    <arg value="${project.build.directory}/spark-rapids-default-configs.json"/>
+                                </java>
+                            </target>
                         </configuration>
                     </execution>
                 </executions>

From 64b58d9c6d78007bb92bbb861a0bfee0c4c1b82f Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Wed, 7 Aug 2024 13:06:03 -0700
Subject: [PATCH 33/40] more strict arg check

---
 .../com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala      | 2 +-
 .../src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 79451ced31b..14ea0c02cb0 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -24,7 +24,7 @@ import org.apache.commons.lang3.reflect.MethodUtils
  */
 object DumpDefaultConfigs {
   def main(args: Array[String]): Unit = {
-    if (args.length < 2) {
+    if (args.length != 2) {
       System.err.println(s"Usage: ${this.getClass.getCanonicalName} {format} {output_path}")
       System.exit(1)
     }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 718ad4f1c69..aa960d75766 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -2531,7 +2531,7 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     GpuOverrides.parts.values.toSeq.sortBy(_.tag.toString).foreach(_.confHelp(asTable))
   }
   def main(args: Array[String]): Unit = {
-    if (args.length < 2) {
+    if (args.length != 2) {
       System.err.println(s"Usage: ${this.getClass.getCanonicalName}" +
         s" {common_configs_path} {advanced_configs_path}")
       System.exit(1)

From f33683941de9a5f584eb91c1c25f8b2dcbe1842d Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 8 Aug 2024 10:53:00 -0700
Subject: [PATCH 34/40] unpack ambiguous string arguments

---
 .../com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala | 2 +-
 .../main/scala/com/nvidia/spark/rapids/RapidsConf.scala    | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
index 14ea0c02cb0..d3328cd42c3 100644
--- a/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
+++ b/integration_tests/src/main/scala/com/nvidia/spark/rapids/tests/DumpDefaultConfigs.scala
@@ -30,6 +30,6 @@ object DumpDefaultConfigs {
     }
     // We use the reflection as RapidsConf should be accessed via the shim layer.
     val clazz = ShimReflectionUtils.loadClass("com.nvidia.spark.rapids.RapidsConf")
-    MethodUtils.invokeStaticMethod(clazz, "dumpConfigsWithDefault", args)
+    MethodUtils.invokeStaticMethod(clazz, "dumpConfigsWithDefault", args(0), args(1))
   }
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index aa960d75766..38c966f62d0 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -2558,11 +2558,10 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     val PLAIN, JSON = Value
   }
 
-  def dumpConfigsWithDefault(args: Array[String]): Unit = {
+  def dumpConfigsWithDefault(formatName: String, outputPath: String): Unit = {
     import com.nvidia.spark.rapids.Arm._
 
-    val format = Format.withName(args(0).toUpperCase(Locale.US))
-    val outputPath = args(1)
+    val format = Format.withName(formatName.toUpperCase(Locale.US))
 
     println(s"Dumping all spark-rapids configs and their defaults at ${outputPath}")
 
@@ -2587,7 +2586,7 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
               })
             }
           case Format.JSON =>
-            implicit val formats = DefaultFormats
+            implicit val formats: DefaultFormats.type = DefaultFormats
             bos.write(writePretty(allConfs)
               .getBytes(StandardCharsets.UTF_8))
           case _ =>

From c4ba1a241e6382c39ada1c2067e2d26be38ba893 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 9 Aug 2024 11:04:12 -0700
Subject: [PATCH 35/40] allow legacy negative scale for decimals for some tests

---
 integration_tests/src/main/python/hash_aggregate_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index ea429d4533c..c74b8611475 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -372,6 +372,7 @@ def test_computation_in_grpby_columns():
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_sum(data_gen, conf):
+    conf = copy_and_update(conf, {'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a').agg(f.sum('b')),
         conf = conf)
@@ -384,6 +385,7 @@ def test_hash_grpby_sum(data_gen, conf):
 @pytest.mark.parametrize('data_gen', [_grpkey_short_sum_full_decimals, _grpkey_short_sum_full_neg_scale_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_sum_full_decimal(data_gen, conf):
+    conf = copy_and_update(conf, {'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a').agg(f.sum('b')),
         conf = conf)

From 45beca389221c8c5535ba6800ba1e4355dcaa6f1 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 9 Aug 2024 13:38:59 -0700
Subject: [PATCH 36/40] should fork for RapidsConf

---
 dist/pom.xml           | 2 +-
 scala2.13/dist/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dist/pom.xml b/dist/pom.xml
index abf6232c1a2..28fea40ebdf 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -399,7 +399,7 @@ self.log("... OK")
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
-                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true" fork="true">
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index b9a958493d8..ebfa0fcf829 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -399,7 +399,7 @@ self.log("... OK")
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
-                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
+                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true" fork="true">
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>

From d45dd1e4fe18931ed2591fda9bf22c8df91d6f97 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 9 Aug 2024 14:09:04 -0700
Subject: [PATCH 37/40] remove System.exit() from RapidsConf.main()

---
 dist/pom.xml                                                | 2 +-
 .../src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/dist/pom.xml b/dist/pom.xml
index 28fea40ebdf..abf6232c1a2 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -399,7 +399,7 @@ self.log("... OK")
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
-                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true" fork="true">
+                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index 38c966f62d0..494ba34237e 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -2531,12 +2531,6 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
     GpuOverrides.parts.values.toSeq.sortBy(_.tag.toString).foreach(_.confHelp(asTable))
   }
   def main(args: Array[String]): Unit = {
-    if (args.length != 2) {
-      System.err.println(s"Usage: ${this.getClass.getCanonicalName}" +
-        s" {common_configs_path} {advanced_configs_path}")
-      System.exit(1)
-    }
-
     // Include the configs in PythonConfEntries
     com.nvidia.spark.rapids.python.PythonConfEntries.init()
     val configs = new FileOutputStream(new File(args(0)))

From 951c5f3f5ae2e5e06f18c760fc94e67f6535a255 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Fri, 9 Aug 2024 14:16:21 -0700
Subject: [PATCH 38/40] missing change for scala 2.13

---
 scala2.13/dist/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index ebfa0fcf829..b9a958493d8 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -399,7 +399,7 @@ self.log("... OK")
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <equals arg1="spark320" arg2="${spark.version.classifier}"/>
                                     <ac:then>
-                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true" fork="true">
+                                        <java classname="com.nvidia.spark.rapids.RapidsConf" failonerror="true">
                                             <arg value="${project.basedir}/../docs/configs.md"/>
                                             <arg value="${project.basedir}/../docs/additional-functionality/advanced_configs.md"/>
                                         </java>

From ab5d8b4553de684d181bf45daeabbe192189d708 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Mon, 12 Aug 2024 14:28:26 -0700
Subject: [PATCH 39/40] Fix more tests to set configs

---
 .../src/main/python/arithmetic_ops_test.py    | 113 +++++++++++-------
 integration_tests/src/main/python/ast_test.py |   8 +-
 .../src/main/python/cast_test.py              |  27 +++--
 .../src/main/python/date_time_test.py         |   3 +-
 .../src/main/python/hash_aggregate_test.py    |  11 +-
 .../src/main/python/string_test.py            |   3 +-
 6 files changed, 108 insertions(+), 57 deletions(-)

diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
index d7fd941b97b..18e9bba6a2c 100644
--- a/integration_tests/src/main/python/arithmetic_ops_test.py
+++ b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -92,6 +92,8 @@
         pytest.param(_decimal_gen_38_0, marks=pytest.mark.skipif(
             is_spark_330_or_later(), reason='This case overflows in Spark 3.3.0+'))]
 
+allow_neg_scale_conf = {'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'}
+
 def _get_overflow_df(spark, data, data_type, expr):
     return spark.createDataFrame(
         SparkContext.getOrCreate().parallelize([data]),
@@ -108,7 +110,8 @@ def test_addition(data_gen):
                 f.lit(-12).cast(data_type) + f.col('b'),
                 f.lit(None).cast(data_type) + f.col('a'),
                 f.col('b') + f.lit(None).cast(data_type),
-                f.col('a') + f.col('b')))
+                f.col('a') + f.col('b')),
+    conf=allow_neg_scale_conf)
 
 # If it will not overflow for multiply it is good for add too
 @pytest.mark.parametrize('data_gen', _no_overflow_multiply_gens, ids=idfn)
@@ -133,7 +136,8 @@ def test_subtraction(data_gen):
                 f.lit(-12).cast(data_type) - f.col('b'),
                 f.lit(None).cast(data_type) - f.col('a'),
                 f.col('b') - f.lit(None).cast(data_type),
-                f.col('a') - f.col('b')))
+                f.col('a') - f.col('b')),
+    conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(6, 5),
     DecimalGen(6, 4), DecimalGen(5, 4), DecimalGen(5, 3), DecimalGen(4, 2), DecimalGen(3, -2),
@@ -145,7 +149,8 @@ def test_subtraction(data_gen):
 @disable_ansi_mode
 def test_addition_subtraction_mixed(lhs, rhs, addOrSub):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a {addOrSub} b")
+        lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a {addOrSub} b"),
+        conf=allow_neg_scale_conf
     )
 
 # If it will not overflow for multiply it is good for subtract too
@@ -178,7 +183,8 @@ def test_multiplication(data_gen):
                 f.lit(None).cast(data_type) * f.col('a'),
                 f.col('b') * f.lit(None).cast(data_type),
                 f.col('a') * f.col('b')
-                ))
+                ),
+    conf=allow_neg_scale_conf)
 
 @allow_non_gpu('ProjectExec', 'Alias', 'Multiply', 'Cast')
 @pytest.mark.parametrize('data_gen', _no_overflow_multiply_gens_for_fallback, ids=idfn)
@@ -215,16 +221,18 @@ def test_multiplication_ansi_overflow():
 def test_multiplication_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
-                f.col('a') * f.col('b')))
+                f.col('a') * f.col('b')),
+    conf=allow_neg_scale_conf)
 
 @approximate_float # we should get the perfectly correct answer for floats except when casting a decimal to a float in some corner cases.
 @pytest.mark.parametrize('lhs', [float_gen, double_gen], ids=idfn)
 @pytest.mark.parametrize('rhs', [DecimalGen(6, 3), DecimalGen(10, -2), DecimalGen(15, 3)], ids=idfn)
 def test_float_multiplication_mixed(lhs, rhs):
+    conf = copy_and_update(allow_neg_scale_conf, {'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
                 f.col('a') * f.col('b')),
-            conf={'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
+            conf=conf)
 
 @pytest.mark.parametrize('data_gen', [double_gen, decimal_gen_32bit_neg_scale, DecimalGen(6, 3),
  DecimalGen(5, 5), DecimalGen(6, 0), DecimalGen(7, 4), DecimalGen(15, 0), DecimalGen(18, 0),
@@ -238,7 +246,8 @@ def test_division(data_gen):
                 f.lit(-12).cast(data_type) / f.col('b'),
                 f.lit(None).cast(data_type) / f.col('a'),
                 f.col('b') / f.lit(None).cast(data_type),
-                f.col('a') / f.col('b')))
+                f.col('a') / f.col('b')),
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('rhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(4, 1), DecimalGen(5, 0), DecimalGen(5, 1), DecimalGen(10, 5)], ids=idfn)
 @pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen, DecimalGen(5, 3), DecimalGen(4, 2), DecimalGen(1, -2), DecimalGen(16, 1)], ids=idfn)
@@ -247,7 +256,8 @@ def test_division_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
                 f.col('a'), f.col('b'),
-                f.col('a') / f.col('b')))
+                f.col('a') / f.col('b')),
+        conf=allow_neg_scale_conf)
 
 # Spark has some problems with some decimal operations where it can try to generate a type that is invalid (scale > precision) which results in an error
 # instead of increasing the precision. So we have a second test that deals with a few of these use cases
@@ -258,7 +268,8 @@ def test_division_mixed_larger_dec(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
                 f.col('a'), f.col('b'),
-                f.col('a') / f.col('b')))
+                f.col('a') / f.col('b')),
+    conf=allow_neg_scale_conf)
 
 @disable_ansi_mode
 def test_special_decimal_division():
@@ -268,7 +279,8 @@ def test_special_decimal_division():
             data_gen = DecimalGen(precision, scale)
             assert_gpu_and_cpu_are_equal_collect(
                     lambda spark : two_col_df(spark, data_gen, data_gen).select(
-                        f.col('a') / f.col('b')))
+                        f.col('a') / f.col('b')),
+                conf=allow_neg_scale_conf)
 
 @approximate_float # we should get the perfectly correct answer for floats except when casting a decimal to a float in some corner cases.
 @pytest.mark.parametrize('rhs', [float_gen, double_gen], ids=idfn)
@@ -278,7 +290,7 @@ def test_float_division_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
                 f.col('a') / f.col('b')),
-            conf={'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', integral_gens + [
     decimal_gen_32bit, decimal_gen_64bit, _decimal_gen_7_7, _decimal_gen_18_3, _decimal_gen_30_2,
@@ -301,7 +313,8 @@ def test_int_division(data_gen):
 def test_int_division_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).selectExpr(
-                'a DIV b'))
+                'a DIV b'),
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', _arith_data_gens, ids=idfn)
 @disable_ansi_mode
@@ -313,7 +326,8 @@ def test_mod(data_gen):
                 f.lit(-12).cast(data_type) % f.col('b'),
                 f.lit(None).cast(data_type) % f.col('a'),
                 f.col('b') % f.lit(None).cast(data_type),
-                f.col('a') % f.col('b')))
+                f.col('a') % f.col('b')),
+        conf=allow_neg_scale_conf)
 
 # pmod currently falls back for Decimal(precision=38)
 # https://github.com/NVIDIA/spark-rapids/issues/6336
@@ -434,7 +448,8 @@ def test_mod_pmod_by_zero_not_ansi(data_gen):
 @disable_ansi_mode
 def test_mod_mixed(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a % b"))
+        lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"a % b"),
+        conf=allow_neg_scale_conf)
 
 # @pytest.mark.skipif(not is_databricks113_or_later() and not is_spark_340_or_later(), reason="https://github.com/NVIDIA/spark-rapids/issues/8330")
 @pytest.mark.parametrize('lhs', [DecimalGen(38,0), DecimalGen(37,2), DecimalGen(38,5), DecimalGen(38,-10), DecimalGen(38,7)], ids=idfn)
@@ -442,7 +457,8 @@ def test_mod_mixed(lhs, rhs):
 @disable_ansi_mode
 def test_mod_mixed_decimal128(lhs, rhs):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : two_col_df(spark, lhs, rhs).selectExpr("a", "b", f"a % b"))
+        lambda spark : two_col_df(spark, lhs, rhs).selectExpr("a", "b", f"a % b"),
+        conf=allow_neg_scale_conf)
 
 # Split into 4 tests to permute https://github.com/NVIDIA/spark-rapids/issues/7553 failures
 @pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
@@ -461,7 +477,8 @@ def test_pmod_mixed_numeric(lhs, rhs):
 def test_pmod_mixed_decimal_lhs(lhs, rhs):
     assert_gpu_fallback_collect(
         lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
-        "Pmod")
+        "Pmod",
+        conf=allow_neg_scale_conf)
 
 @allow_non_gpu("ProjectExec", "Pmod")
 @pytest.mark.parametrize('lhs', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
@@ -472,7 +489,8 @@ def test_pmod_mixed_decimal_lhs(lhs, rhs):
 def test_pmod_mixed_decimal_rhs(lhs, rhs):
     assert_gpu_fallback_collect(
         lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
-        "Pmod")
+        "Pmod",
+        conf=allow_neg_scale_conf)
 
 @allow_non_gpu("ProjectExec", "Pmod")
 @pytest.mark.parametrize('lhs', [DecimalGen(6, 5), DecimalGen(6, 4), DecimalGen(5, 4), DecimalGen(5, 3),
@@ -485,7 +503,8 @@ def test_pmod_mixed_decimal_rhs(lhs, rhs):
 def test_pmod_mixed_decimal(lhs, rhs):
     assert_gpu_fallback_collect(
         lambda spark : two_col_df(spark, lhs, rhs).selectExpr(f"pmod(a, b)"),
-        "Pmod")
+        "Pmod",
+    conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
 def test_signum(data_gen):
@@ -496,27 +515,31 @@ def test_signum(data_gen):
 @disable_ansi_mode
 def test_unary_minus(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'),
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', _arith_decimal_gens_high_precision, ids=idfn)
 @pytest.mark.skipif(is_scala213(), reason="Apache Spark built with Scala 2.13 produces inconsistent results at high precision (SPARK-45438)")
 def test_unary_minus_decimal128(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'),
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', _no_overflow_multiply_gens + [float_gen, double_gen] +
     _arith_decimal_gens_low_precision, ids=idfn)
 def test_unary_minus_ansi_no_overflow(data_gen):
+    conf = copy_and_update(ansi_enabled_conf, allow_neg_scale_conf)
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'),
-            conf=ansi_enabled_conf)
+            conf=conf)
 
 @pytest.mark.parametrize('data_gen', _arith_decimal_gens_high_precision, ids=idfn)
 @pytest.mark.skipif(is_scala213(), reason="Apache Spark built with Scala 2.13 produces inconsistent results at high precision (SPARK-45438)")
 def test_unary_minus_ansi_no_overflow_decimal128(data_gen):
+    conf = copy_and_update(ansi_enabled_conf, allow_neg_scale_conf)
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('-a'),
-            conf=ansi_enabled_conf)
+            conf=conf)
 
 @pytest.mark.parametrize('data_type,value', [
     (LongType(), LONG_MIN),
@@ -540,34 +563,39 @@ def test_unary_minus_ansi_overflow(data_type, value):
 @pytest.mark.parametrize('data_gen', _arith_data_gens, ids=idfn)
 def test_unary_positive(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : unary_op_df(spark, data_gen).selectExpr('+a'))
+        lambda spark : unary_op_df(spark, data_gen).selectExpr('+a'),
+    conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', numeric_gens + _arith_decimal_gens_low_precision, ids=idfn)
 @disable_ansi_mode
 def test_abs(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'),
+        conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', _arith_decimal_gens_high_precision, ids=idfn)
 @pytest.mark.skipif(is_scala213(), reason="Apache Spark built with Scala 2.13 produces inconsistent results at high precision (SPARK-45438)")
 def test_abs_decimal128(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'),
+        conf=allow_neg_scale_conf)
 
 # ANSI is ignored for abs prior to 3.2.0, but still okay to test it a little more.
 @pytest.mark.parametrize('data_gen', _no_overflow_multiply_gens + [float_gen, double_gen] +
     _arith_decimal_gens_low_precision, ids=idfn)
 def test_abs_ansi_no_overflow(data_gen):
+    conf = copy_and_update(ansi_enabled_conf, allow_neg_scale_conf)
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('abs(a)'),
-            conf=ansi_enabled_conf)
+            conf=conf)
 
 @pytest.mark.parametrize('data_gen', _arith_decimal_gens_high_precision, ids=idfn)
 @pytest.mark.skipif(is_scala213(), reason="Apache Spark built with Scala 2.13 produces inconsistent results at high precision")
 def test_abs_ansi_no_overflow_decimal128(data_gen):
+    conf = copy_and_update(ansi_enabled_conf, allow_neg_scale_conf)
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('a','abs(a)'),
-            conf=ansi_enabled_conf)
+            conf=conf)
 
 # Only run this test for Spark v3.2.0 and later to verify abs will
 # throw exceptions for overflow when ANSI mode is enabled.
@@ -644,11 +672,11 @@ def test_floor_ceil_overflow(data_gen):
         "pyspark.errors.exceptions.captured.ArithmeticException: [NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION]"
     assert_gpu_and_cpu_error(
         lambda spark: unary_op_df(spark, data_gen).selectExpr('floor(a)').collect(),
-        conf={},
+        conf=allow_neg_scale_conf,
         error_message=exception_type)
     assert_gpu_and_cpu_error(
         lambda spark: unary_op_df(spark, data_gen).selectExpr('ceil(a)').collect(),
-        conf={},
+        conf=allow_neg_scale_conf,
         error_message=exception_type)
 
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
@@ -715,7 +743,8 @@ def test_decimal_bround(data_gen):
                 'bround(a, -1)',
                 'bround(a, 1)',
                 'bround(a, 2)',
-                'bround(a, 10)'))
+                'bround(a, 10)'),
+    conf=allow_neg_scale_conf)
 
 @incompat
 @approximate_float
@@ -730,7 +759,8 @@ def test_decimal_round(data_gen):
                 'round(a, -1)',
                 'round(a, 1)',
                 'round(a, 2)',
-                'round(a, 10)'))
+                'round(a, 10)'),
+    conf=allow_neg_scale_conf)
 
 
 @incompat
@@ -866,7 +896,8 @@ def test_cosh(data_gen):
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
 def test_acosh(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'),
+    conf={'spark.rapids.sql.improvedFloatOps.enabled': 'false'})
 
 # The default approximate is 1e-6 or 1 in a million
 # in some cases we need to adjust this because the algorithm is different
@@ -876,8 +907,7 @@ def test_acosh(data_gen):
 @pytest.mark.parametrize('data_gen', [DoubleGen(min_exp=-20, max_exp=20)], ids=idfn)
 def test_columnar_acosh_improved(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'),
-            {'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'))
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
@@ -901,7 +931,8 @@ def test_asin(data_gen):
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
 def test_asinh(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('asinh(a)'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('asinh(a)'),
+        conf={'spark.rapids.sql.improvedFloatOps.enabled': 'false'})
 
 # The default approximate is 1e-6 or 1 in a million
 # in some cases we need to adjust this because the algorithm is different
@@ -912,7 +943,7 @@ def test_asinh(data_gen):
 def test_columnar_asinh_improved(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr('asinh(a)'),
-            {'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
+        conf={'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)
@@ -1017,7 +1048,7 @@ def test_columnar_pow(data_gen):
 def test_least(data_gen):
     num_cols = 20
     s1 = with_cpu_session(
-        lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen)))
+        lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen)), conf=allow_neg_scale_conf)
     # we want lots of nulls
     gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=100.0))
         for x in range(0, num_cols)], nullable=False)
@@ -1027,13 +1058,14 @@ def test_least(data_gen):
     data_type = data_gen.data_type
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : gen_df(spark, gen).select(
-                f.least(*command_args)))
+                f.least(*command_args)),
+    conf=allow_neg_scale_conf)
 
 @pytest.mark.parametrize('data_gen', all_basic_gens + _arith_decimal_gens, ids=idfn)
 def test_greatest(data_gen):
     num_cols = 20
     s1 = with_cpu_session(
-        lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen)))
+        lambda spark: gen_scalar(data_gen, force_no_nulls=not isinstance(data_gen, NullGen)), conf=allow_neg_scale_conf)
     # we want lots of nulls
     gen = StructGen([('_c' + str(x), data_gen.copy_special_case(None, weight=100.0))
         for x in range(0, num_cols)], nullable=False)
@@ -1042,7 +1074,8 @@ def test_greatest(data_gen):
     data_type = data_gen.data_type
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : gen_df(spark, gen).select(
-                f.greatest(*command_args)))
+                f.greatest(*command_args)),
+    conf=allow_neg_scale_conf)
 
 
 def _test_div_by_zero(ansi_mode, expr, is_lit=False):
diff --git a/integration_tests/src/main/python/ast_test.py b/integration_tests/src/main/python/ast_test.py
index ed6547a429b..7e9c91e4a47 100644
--- a/integration_tests/src/main/python/ast_test.py
+++ b/integration_tests/src/main/python/ast_test.py
@@ -191,12 +191,14 @@ def test_atan(data_descr):
 @approximate_float
 @pytest.mark.parametrize('data_descr', [(double_gen, False)], ids=idfn)
 def test_asinh(data_descr):
-    assert_unary_ast(data_descr, lambda df: df.selectExpr('asinh(a)'))
+    assert_unary_ast(data_descr, lambda df: df.selectExpr('asinh(a)'),
+                     conf={'spark.rapids.sql.improvedFloatOps.enabled': 'false'})
 
 @approximate_float
 @pytest.mark.parametrize('data_descr', ast_double_descr, ids=idfn)
 def test_acosh(data_descr):
-    assert_unary_ast(data_descr, lambda df: df.selectExpr('acosh(a)'))
+    assert_unary_ast(data_descr, lambda df: df.selectExpr('acosh(a)'),
+                     conf={'spark.rapids.sql.improvedFloatOps.enabled': 'false'})
 
 @approximate_float
 @pytest.mark.parametrize('data_descr', ast_double_descr, ids=idfn)
@@ -211,7 +213,7 @@ def test_atanh(data_descr):
 @pytest.mark.parametrize('data_descr', [(DoubleGen(min_exp=-20, max_exp=20), True)], ids=idfn)
 def test_asinh_improved(data_descr):
     assert_unary_ast(data_descr, lambda df: df.selectExpr('asinh(a)'),
-        conf={'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
+                     conf={'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
 
 # The default approximate is 1e-6 or 1 in a million
 # in some cases we need to adjust this because the algorithm is different
diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py
index f7784178182..ec22253a9a3 100644
--- a/integration_tests/src/main/python/cast_test.py
+++ b/integration_tests/src/main/python/cast_test.py
@@ -194,7 +194,8 @@ def test_cast_string_timestamp_fallback():
 def test_cast_decimal_to(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')),
-            conf = {'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
+            conf = {'spark.rapids.sql.castDecimalToFloat.enabled': 'true',
+                    'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', [
@@ -208,7 +209,8 @@ def test_ansi_cast_decimal_to(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')),
             conf = {'spark.rapids.sql.castDecimalToFloat.enabled': True,
-                'spark.sql.ansi.enabled': True})
+                    'spark.sql.ansi.enabled': True,
+                    'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10050')
 @pytest.mark.parametrize('data_gen', [
@@ -228,7 +230,8 @@ def test_ansi_cast_decimal_to(data_gen, to_type):
     DecimalType(1, -1)], ids=meta_idfn('to:'))
 def test_cast_decimal_to_decimal(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')))
+            lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @pytest.mark.parametrize('data_gen', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
 @pytest.mark.parametrize('to_type', [
@@ -248,22 +251,26 @@ def test_cast_integral_to_decimal(data_gen, to_type):
 def test_cast_byte_to_decimal_overflow():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, byte_gen).select(
-            f.col('a').cast(DecimalType(2, -1))))
+            f.col('a').cast(DecimalType(2, -1))),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 def test_cast_short_to_decimal_overflow():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, short_gen).select(
-            f.col('a').cast(DecimalType(4, -1))))
+            f.col('a').cast(DecimalType(4, -1))),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 def test_cast_int_to_decimal_overflow():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, int_gen).select(
-            f.col('a').cast(DecimalType(9, -1))))
+            f.col('a').cast(DecimalType(9, -1))),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 def test_cast_long_to_decimal_overflow():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, long_gen).select(
-            f.col('a').cast(DecimalType(18, -1))))
+            f.col('a').cast(DecimalType(18, -1))),
+    conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 
 _float_special_cases = [(float("inf"), 5.0), (float("-inf"), 5.0), (float("nan"), 5.0)]
@@ -282,7 +289,8 @@ def test_cast_floating_point_to_decimal(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(
             f.col('a'), f.col('a').cast(to_type)),
-        conf={'spark.rapids.sql.castFloatToDecimal.enabled': 'true'})
+        conf={'spark.rapids.sql.castFloatToDecimal.enabled': 'true',
+              'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 # casting these types to string should be passed
 basic_gens_for_cast_to_string = [ByteGen, ShortGen, IntegerGen, LongGen, StringGen, BooleanGen, DateGen, TimestampGen]
@@ -439,7 +447,8 @@ def is_neg_dec_scale_bug_version():
 def test_cast_string_to_negative_scale_decimal():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, StringGen("[0-9]{9}")).select(
-            f.col('a').cast(DecimalType(8, -3))))
+            f.col('a').cast(DecimalType(8, -3))),
+    conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @pytest.mark.skipif(is_before_spark_330(), reason="ansi cast throws exception only in 3.3.0+")
 @pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
index 0c877f00238..34678a2e1ad 100644
--- a/integration_tests/src/main/python/date_time_test.py
+++ b/integration_tests/src/main/python/date_time_test.py
@@ -622,7 +622,8 @@ def test_unsupported_fallback_to_date():
 @allow_non_gpu(*non_utc_allow)
 def test_timestamp_seconds(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_seconds(a)"))
+        lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_seconds(a)"),
+    conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @allow_non_gpu(*non_utc_allow)
 def test_timestamp_seconds_long_overflow():
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index c74b8611475..5f1470d4d9c 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -410,6 +410,7 @@ def test_hash_reduction_sum(data_gen, conf):
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @datagen_overrides(seed=0, permanent=True, reason='https://github.com/NVIDIA/spark-rapids/issues/9779')
 def test_hash_reduction_sum_full_decimal(data_gen, conf):
+    conf = copy_and_update(conf, {'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen, length=100).selectExpr("SUM(a)"),
         conf = conf)
@@ -454,7 +455,8 @@ def test_hash_avg_nulls_partial_only(data_gen):
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 def test_intersect_all(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen, length=100)))
+        lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen, length=100)),
+    conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 @approximate_float
 @ignore_order
@@ -463,7 +465,8 @@ def test_intersect_all(data_gen):
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 def test_exceptAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : gen_df(spark, data_gen, length=100).exceptAll(gen_df(spark, data_gen, length=100).filter('a != b')))
+        lambda spark : gen_df(spark, data_gen, length=100).exceptAll(gen_df(spark, data_gen, length=100).filter('a != b')),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
 
 # Spark fails to sort some decimal values due to overflow when calculating the sorting prefix.
 # See https://issues.apache.org/jira/browse/SPARK-40129
@@ -491,6 +494,7 @@ def test_exceptAll(data_gen):
 @pytest.mark.parametrize('data_gen', _pivot_gens_with_decimals, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_pivot(data_gen, conf):
+    conf = copy_and_update(conf, {'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a')
@@ -1112,7 +1116,8 @@ def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback()
         non_exist_classes='GpuApproximatePercentile,GpuObjectHashAggregateExec',
         table_name='table',
         sql="""select k,
-        approx_percentile(v, array(0.25, 0.5, 0.75)) from table group by k""")
+        approx_percentile(v, array(0.25, 0.5, 0.75)) from table group by k""",
+    conf={'spark.rapids.sql.incompatibleOps.enabled': 'false'})
 
 @approximate_float
 @ignore_order
diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py
index 4ae4a827aa0..e9bc9db6462 100644
--- a/integration_tests/src/main/python/string_test.py
+++ b/integration_tests/src/main/python/string_test.py
@@ -878,7 +878,8 @@ def test_format_number_supported(data_gen):
             'format_number(a, 10)',
             'format_number(a, 13)',
             'format_number(a, 30)',
-            'format_number(a, 100)')
+            'format_number(a, 100)'),
+        conf={'spark.sql.legacy.allowNegativeScaleOfDecimal': 'true'}
     )
 
 format_float_special_vals = [float('nan'), float('inf'), float('-inf'), 0.0, -0.0, 

From 04d86b8f596daf9a0743894cf165ace55af13007 Mon Sep 17 00:00:00 2001
From: Jihoon Son <ghoonson@gmail.com>
Date: Thu, 15 Aug 2024 10:25:35 -0700
Subject: [PATCH 40/40] add back explicit configs

---
 integration_tests/src/main/python/arithmetic_ops_test.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/integration_tests/src/main/python/arithmetic_ops_test.py b/integration_tests/src/main/python/arithmetic_ops_test.py
index 18e9bba6a2c..9943d7deb7e 100644
--- a/integration_tests/src/main/python/arithmetic_ops_test.py
+++ b/integration_tests/src/main/python/arithmetic_ops_test.py
@@ -287,10 +287,11 @@ def test_special_decimal_division():
 @pytest.mark.parametrize('lhs', [DecimalGen(5, 3), DecimalGen(4, 2), DecimalGen(1, -2), DecimalGen(16, 1)], ids=idfn)
 @disable_ansi_mode
 def test_float_division_mixed(lhs, rhs):
+    conf = copy_and_update(allow_neg_scale_conf, {'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : two_col_df(spark, lhs, rhs).select(
                 f.col('a') / f.col('b')),
-        conf=allow_neg_scale_conf)
+        conf=conf)
 
 @pytest.mark.parametrize('data_gen', integral_gens + [
     decimal_gen_32bit, decimal_gen_64bit, _decimal_gen_7_7, _decimal_gen_18_3, _decimal_gen_30_2,
@@ -907,7 +908,8 @@ def test_acosh(data_gen):
 @pytest.mark.parametrize('data_gen', [DoubleGen(min_exp=-20, max_exp=20)], ids=idfn)
 def test_columnar_acosh_improved(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'))
+            lambda spark : unary_op_df(spark, data_gen).selectExpr('acosh(a)'),
+        {'spark.rapids.sql.improvedFloatOps.enabled': 'true'})
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', double_gens, ids=idfn)