diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
index 93557017b08..1d7b0ab8e0b 100644
--- a/.github/workflows/blossom-ci.yml
+++ b/.github/workflows/blossom-ci.yml
@@ -77,7 +77,8 @@ jobs:
         github.actor == 'Feng-Jiang28' ||
         github.actor == 'SurajAralihalli' ||
         github.actor == 'jihoonson' ||
-        github.actor == 'ustcfy'
+        github.actor == 'ustcfy' ||
+        github.actor == 'knoguchi22'
       )
     steps:
       - name: Check if comment is issued by authorized person
diff --git a/.github/workflows/mvn-verify-check.yml b/.github/workflows/mvn-verify-check.yml
index 86e12a4a32b..0aca7bc3655 100644
--- a/.github/workflows/mvn-verify-check.yml
+++ b/.github/workflows/mvn-verify-check.yml
@@ -246,12 +246,10 @@ jobs:
               echo "Generated Scala 2.13 build files don't match what's in repository"
               exit 1
           fi
-          # change to Scala 2.13 Directory
-          cd scala2.13
           # test command, will retry for 3 times if failed.
           max_retry=3; delay=30; i=1
           while true; do
-            mvn package \
+            mvn package -f scala2.13/ \
               -pl integration_tests,tests,tools -am -P 'individual,pre-merge' \
               -Dbuildver=${{ matrix.spark-version }} -Dmaven.scalastyle.skip=true \
               -Drat.skip=true ${{ env.COMMON_MVN_FLAGS }} && break || {
@@ -303,12 +301,10 @@ jobs:
               echo "Generated Scala 2.13 build files don't match what's in repository"
               exit 1
           fi
-          # change to Scala 2.13 Directory
-          cd scala2.13
           # test command, will retry for 3 times if failed.
           max_retry=3; delay=30; i=1
           while true; do
-            mvn verify \
+            mvn verify -f scala2.13/ \
               -P "individual,pre-merge,source-javadoc" -Dbuildver=${{ matrix.spark-version }} \
               ${{ env.COMMON_MVN_FLAGS }} && break || {
             if [[ $i -le $max_retry ]]; then
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2510eba5dfe..c7786d8586a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,162 @@
 # Change log
-Generated on 2024-10-31
+Generated on 2024-12-08
+
+## Release 24.12
+
+### Features
+|||
+|:---|:---|
+|[#11630](https://github.com/NVIDIA/spark-rapids/issues/11630)|[FEA] enable from_json and json scan by default|
+|[#11709](https://github.com/NVIDIA/spark-rapids/issues/11709)|[FEA] Add support for `MonthsBetween`|
+|[#11666](https://github.com/NVIDIA/spark-rapids/issues/11666)|[FEA] support task limit profiling for specified stages|
+|[#11662](https://github.com/NVIDIA/spark-rapids/issues/11662)|[FEA] Support Apache Spark 3.4.4|
+|[#11657](https://github.com/NVIDIA/spark-rapids/issues/11657)|[FEA] Support format 'yyyyMMdd HH:mm:ss' for legacy mode|
+|[#11419](https://github.com/NVIDIA/spark-rapids/issues/11419)|[FEA] Support Spark 3.5.3 release|
+|[#11492](https://github.com/NVIDIA/spark-rapids/issues/11492)|[FEA] Update rapids JNI and private dependency version to 24.12.0-SNAPSHOT|
+|[#11505](https://github.com/NVIDIA/spark-rapids/issues/11505)|[FEA] Support yyyymmdd format for GetTimestamp for LEGACY mode.|
+
+### Performance
+|||
+|:---|:---|
+|[#8391](https://github.com/NVIDIA/spark-rapids/issues/8391)|[FEA] Do a hash based re-partition instead of a sort based fallback for hash aggregate|
+|[#11560](https://github.com/NVIDIA/spark-rapids/issues/11560)|[FEA] Improve `GpuJsonToStructs` performance|
+|[#11458](https://github.com/NVIDIA/spark-rapids/issues/11458)|[FEA] enable prune_columns for from_json|
+
+### Bugs Fixed
+|||
+|:---|:---|
+|[#11798](https://github.com/NVIDIA/spark-rapids/issues/11798)|[BUG] mismatch CPU and GPU result in test_months_between_first_day[DATAGEN_SEED=1733006411, TZ=Africa/Casablanca]|
+|[#11790](https://github.com/NVIDIA/spark-rapids/issues/11790)|[BUG] test_hash_* failed "java.util.NoSuchElementException: head of empty list" or "Too many times of repartition, may hit a bug?"|
+|[#11643](https://github.com/NVIDIA/spark-rapids/issues/11643)|[BUG] Support AQE with Broadcast Hash Join and DPP on Databricks 14.3|
+|[#10910](https://github.com/NVIDIA/spark-rapids/issues/10910)|from_json, when input = empty object, rapids throws an exception.|
+|[#10891](https://github.com/NVIDIA/spark-rapids/issues/10891)|Parsing a column containing invalid json into StructureType with schema throws an Exception.|
+|[#11741](https://github.com/NVIDIA/spark-rapids/issues/11741)|[BUG] Fix spark400 build due to writeWithV1 return value change|
+|[#11533](https://github.com/NVIDIA/spark-rapids/issues/11533)|Fix JSON Matrix tests on Databricks 14.3|
+|[#11722](https://github.com/NVIDIA/spark-rapids/issues/11722)|[BUG] Spark 4.0.0 has moved `NullIntolerant` and builds are breaking because they are unable to find it.|
+|[#11726](https://github.com/NVIDIA/spark-rapids/issues/11726)|[BUG] Databricks 14.3 nightly deploy fails due to incorrect DB_SHIM_NAME|
+|[#11293](https://github.com/NVIDIA/spark-rapids/issues/11293)|[BUG] A user query with from_json failed with "JSON Parser encountered an invalid format at location"|
+|[#9592](https://github.com/NVIDIA/spark-rapids/issues/9592)|[BUG][JSON] `from_json` to Map type should produce null for invalid entries|
+|[#11715](https://github.com/NVIDIA/spark-rapids/issues/11715)|[BUG] parquet_testing_test.py failed on "AssertionError: GPU and CPU boolean values are different"|
+|[#11716](https://github.com/NVIDIA/spark-rapids/issues/11716)|[BUG] delta_lake_write_test.py failed on "AssertionError: GPU and CPU boolean values are different"|
+|[#11684](https://github.com/NVIDIA/spark-rapids/issues/11684)|[BUG] 24.12 Precommit fails with wrong number of arguments in `GpuDataSource`|
+|[#11168](https://github.com/NVIDIA/spark-rapids/issues/11168)|[BUG] reserve allocation should be displayed when erroring due to lack of memory on startup|
+|[#7585](https://github.com/NVIDIA/spark-rapids/issues/7585)|[BUG] [Regexp] Line anchor '$' incorrect matching of unicode line terminators|
+|[#11622](https://github.com/NVIDIA/spark-rapids/issues/11622)|[BUG] GPU Parquet scan filter pushdown fails with timestamp/INT96 column|
+|[#11646](https://github.com/NVIDIA/spark-rapids/issues/11646)|[BUG] NullPointerException in GpuRand|
+|[#10498](https://github.com/NVIDIA/spark-rapids/issues/10498)|[BUG] Unit tests failed: [INTERVAL_ARITHMETIC_OVERFLOW] integer overflow. Use 'try_add' to tolerate overflow and return NULL instead|
+|[#11659](https://github.com/NVIDIA/spark-rapids/issues/11659)|[BUG] parse_url throws exception if partToExtract is invalid while Spark returns null|
+|[#10894](https://github.com/NVIDIA/spark-rapids/issues/10894)|Parsing a column containing a nested structure to json thows an exception|
+|[#10895](https://github.com/NVIDIA/spark-rapids/issues/10895)|Converting a column containing a map into json throws an exception|
+|[#10896](https://github.com/NVIDIA/spark-rapids/issues/10896)|Converting an column containing an array into json throws an exception|
+|[#10915](https://github.com/NVIDIA/spark-rapids/issues/10915)|to_json when converts an array will throw an exception:|
+|[#10916](https://github.com/NVIDIA/spark-rapids/issues/10916)|to_json function doesn't support map[string, struct] to json conversion.|
+|[#10919](https://github.com/NVIDIA/spark-rapids/issues/10919)|to_json converting map[string, integer] to json, throws an exception|
+|[#10920](https://github.com/NVIDIA/spark-rapids/issues/10920)|to_json converting an array with maps throws an exception.|
+|[#10921](https://github.com/NVIDIA/spark-rapids/issues/10921)|to_json - array with single map|
+|[#10923](https://github.com/NVIDIA/spark-rapids/issues/10923)|[BUG] Spark UT framework: to_json function to convert the array with a single empty row to a JSON string throws an exception.|
+|[#10924](https://github.com/NVIDIA/spark-rapids/issues/10924)|[BUG] Spark UT framework: to_json when converts an empty array into json throws an exception. |
+|[#11024](https://github.com/NVIDIA/spark-rapids/issues/11024)|Fix tests failures in parquet_write_test.py|
+|[#11174](https://github.com/NVIDIA/spark-rapids/issues/11174)|Opcode Suite fails for Scala 2.13.8+ |
+|[#10483](https://github.com/NVIDIA/spark-rapids/issues/10483)|[BUG] JsonToStructs fails to parse all empty dicts and invalid lines|
+|[#10489](https://github.com/NVIDIA/spark-rapids/issues/10489)|[BUG] from_json does not support input with \n in it.|
+|[#10347](https://github.com/NVIDIA/spark-rapids/issues/10347)|[BUG] Failures in Integration Tests on Dataproc Serverless|
+|[#11021](https://github.com/NVIDIA/spark-rapids/issues/11021)|Fix tests failures in orc_cast_test.py|
+|[#11609](https://github.com/NVIDIA/spark-rapids/issues/11609)|[BUG] test_hash_repartition_long_overflow_ansi_exception failed on 341DB|
+|[#11600](https://github.com/NVIDIA/spark-rapids/issues/11600)|[BUG] regex_test failed mismatched cpu and gpu values in UT and IT|
+|[#11611](https://github.com/NVIDIA/spark-rapids/issues/11611)|[BUG] Spark 4.0 build failure - value cannotSaveIntervalIntoExternalStorageError is not a member of object org.apache.spark.sql.errors.QueryCompilationErrors|
+|[#10922](https://github.com/NVIDIA/spark-rapids/issues/10922)|from_json cannot support line separator in the input string.|
+|[#11009](https://github.com/NVIDIA/spark-rapids/issues/11009)|Fix tests failures in cast_test.py|
+|[#11572](https://github.com/NVIDIA/spark-rapids/issues/11572)|[BUG] MultiFileReaderThreadPool may flood the console with log messages|
+
+### PRs
+|||
+|:---|:---|
+|[#11763](https://github.com/NVIDIA/spark-rapids/pull/11763)|Orc writes don't fully support Booleans with nulls |
+|[#11802](https://github.com/NVIDIA/spark-rapids/pull/11802)|Fall back to CPU for non-UTC months_between|
+|[#11792](https://github.com/NVIDIA/spark-rapids/pull/11792)|[BUG] Fix issue 11790|
+|[#11712](https://github.com/NVIDIA/spark-rapids/pull/11712)|repartition-based fallback for hash aggregate v3|
+|[#11730](https://github.com/NVIDIA/spark-rapids/pull/11730)|Add support for asynchronous writing for parquet|
+|[#11750](https://github.com/NVIDIA/spark-rapids/pull/11750)|Fix aqe_test failures on  14.3.|
+|[#11753](https://github.com/NVIDIA/spark-rapids/pull/11753)|Enable JSON Scan and from_json by default|
+|[#11733](https://github.com/NVIDIA/spark-rapids/pull/11733)|Print out the current attempt object when OOM inside a retry block|
+|[#11618](https://github.com/NVIDIA/spark-rapids/pull/11618)|Execute `from_json` with struct schema using `JSONUtils.fromJSONToStructs`|
+|[#11725](https://github.com/NVIDIA/spark-rapids/pull/11725)|host watermark metric|
+|[#11746](https://github.com/NVIDIA/spark-rapids/pull/11746)|Remove batch size bytes limits|
+|[#11723](https://github.com/NVIDIA/spark-rapids/pull/11723)|Add NVIDIA Copyright|
+|[#11721](https://github.com/NVIDIA/spark-rapids/pull/11721)|Add a few more JSON tests for MAP<STRING,STRING>|
+|[#11744](https://github.com/NVIDIA/spark-rapids/pull/11744)|Do not package the Databricks 14.3 shim into the dist jar [skip ci]|
+|[#11724](https://github.com/NVIDIA/spark-rapids/pull/11724)|Integrate with kudo|
+|[#11739](https://github.com/NVIDIA/spark-rapids/pull/11739)|Update to Spark 4.0 changing signature of SupportsV1Write.writeWithV1|
+|[#11737](https://github.com/NVIDIA/spark-rapids/pull/11737)|Add in support for months_between|
+|[#11700](https://github.com/NVIDIA/spark-rapids/pull/11700)|Fix leak with RapidsHostColumnBuilder in GpuUserDefinedFunction|
+|[#11727](https://github.com/NVIDIA/spark-rapids/pull/11727)|Widen type promotion for decimals with larger scale in Parquet Read|
+|[#11719](https://github.com/NVIDIA/spark-rapids/pull/11719)|Skip `from_json` overflow tests for  14.3|
+|[#11708](https://github.com/NVIDIA/spark-rapids/pull/11708)|Support profiling for specific stages on a limited number of tasks|
+|[#11731](https://github.com/NVIDIA/spark-rapids/pull/11731)|Add NullIntolerantShim to adapt to Spark 4.0 removing NullIntolerant|
+|[#11413](https://github.com/NVIDIA/spark-rapids/pull/11413)|Support multi string contains|
+|[#11728](https://github.com/NVIDIA/spark-rapids/pull/11728)|Change Databricks 14.3 shim name to spark350db143 [skip ci]|
+|[#11702](https://github.com/NVIDIA/spark-rapids/pull/11702)|Improve JSON scan and `from_json`|
+|[#11635](https://github.com/NVIDIA/spark-rapids/pull/11635)|Added Shims for adding Databricks 14.3 Support|
+|[#11714](https://github.com/NVIDIA/spark-rapids/pull/11714)|Let AWS Databricks automatically choose an Availability Zone|
+|[#11703](https://github.com/NVIDIA/spark-rapids/pull/11703)|Simplify $ transpiling and fix newline character bug|
+|[#11707](https://github.com/NVIDIA/spark-rapids/pull/11707)|impalaFile cannot be found by UT framework. |
+|[#11697](https://github.com/NVIDIA/spark-rapids/pull/11697)|Make delta-lake shim dependencies parametrizable|
+|[#11710](https://github.com/NVIDIA/spark-rapids/pull/11710)|Add shim version 344 to LogicalPlanShims.scala|
+|[#11706](https://github.com/NVIDIA/spark-rapids/pull/11706)|Add retry support in sub hash join|
+|[#11673](https://github.com/NVIDIA/spark-rapids/pull/11673)|Fix Parquet Writer tests on  14.3|
+|[#11669](https://github.com/NVIDIA/spark-rapids/pull/11669)|Fix `string_test` for  14.3|
+|[#11692](https://github.com/NVIDIA/spark-rapids/pull/11692)|Add Spark 3.4.4 Shim |
+|[#11695](https://github.com/NVIDIA/spark-rapids/pull/11695)|Fix spark400 build due to LogicalRelation signature changes|
+|[#11689](https://github.com/NVIDIA/spark-rapids/pull/11689)|Update the Maven repository to download Spark JAR files [skip ci]|
+|[#11670](https://github.com/NVIDIA/spark-rapids/pull/11670)|Fix `misc_expr_test` for  14.3|
+|[#11652](https://github.com/NVIDIA/spark-rapids/pull/11652)|Fix skipping fixed_length_char ORC tests on  > 13.3|
+|[#11644](https://github.com/NVIDIA/spark-rapids/pull/11644)|Skip AQE-join-DPP tests for  14.3|
+|[#11667](https://github.com/NVIDIA/spark-rapids/pull/11667)|Preparation for the coming Kudo support|
+|[#11685](https://github.com/NVIDIA/spark-rapids/pull/11685)|Exclude shimplify-generated files from scalastyle|
+|[#11282](https://github.com/NVIDIA/spark-rapids/pull/11282)|Reserve allocation should be displayed when erroring due to lack of memory on startup|
+|[#11671](https://github.com/NVIDIA/spark-rapids/pull/11671)|Use the new host memory allocation API|
+|[#11682](https://github.com/NVIDIA/spark-rapids/pull/11682)|Fix auto merge conflict 11679 [skip ci]|
+|[#11663](https://github.com/NVIDIA/spark-rapids/pull/11663)|Simplify Transpilation of $ with Extended Line Separator Support in cuDF Regex|
+|[#11672](https://github.com/NVIDIA/spark-rapids/pull/11672)|Fix race condition with Parquet filter pushdown modifying shared hadoop Configuration|
+|[#11596](https://github.com/NVIDIA/spark-rapids/pull/11596)|Add a new NVTX range for task GPU ownership|
+|[#11664](https://github.com/NVIDIA/spark-rapids/pull/11664)|Fix `orc_write_test.py` for  14.3|
+|[#11656](https://github.com/NVIDIA/spark-rapids/pull/11656)|[DOC] update the supported OS in download page [skip ci]|
+|[#11665](https://github.com/NVIDIA/spark-rapids/pull/11665)|Generate classes identical up to the shim package name|
+|[#11647](https://github.com/NVIDIA/spark-rapids/pull/11647)|Fix a NPE issue in GpuRand|
+|[#11658](https://github.com/NVIDIA/spark-rapids/pull/11658)|Support format 'yyyyMMdd HH:mm:ss' for legacy mode|
+|[#11661](https://github.com/NVIDIA/spark-rapids/pull/11661)|Support invalid partToExtract for parse_url|
+|[#11520](https://github.com/NVIDIA/spark-rapids/pull/11520)|UT adjust override checkScanSchemata & enabling ut of exclude_by_suffix fea.|
+|[#11634](https://github.com/NVIDIA/spark-rapids/pull/11634)|Put DF_UDF plugin code into the main uber jar.|
+|[#11522](https://github.com/NVIDIA/spark-rapids/pull/11522)|UT adjust test SPARK-26677: negated null-safe equality comparison|
+|[#11521](https://github.com/NVIDIA/spark-rapids/pull/11521)|Datetime rebasing issue fixed|
+|[#11642](https://github.com/NVIDIA/spark-rapids/pull/11642)|Update to_json to be more generic and fix some bugs|
+|[#11615](https://github.com/NVIDIA/spark-rapids/pull/11615)|Spark 4 parquet_writer_test.py fixes|
+|[#11623](https://github.com/NVIDIA/spark-rapids/pull/11623)|Fix `collection_ops_test` for  14.3|
+|[#11553](https://github.com/NVIDIA/spark-rapids/pull/11553)|Fix udf-compiler scala2.13 internal return statements|
+|[#11640](https://github.com/NVIDIA/spark-rapids/pull/11640)|Disable date/timestamp types by default when parsing JSON|
+|[#11570](https://github.com/NVIDIA/spark-rapids/pull/11570)|Add support for Spark 3.5.3|
+|[#11591](https://github.com/NVIDIA/spark-rapids/pull/11591)|Spark UT framework: Read Parquet file generated by parquet-thrift Rapids, UT case adjust.|
+|[#11631](https://github.com/NVIDIA/spark-rapids/pull/11631)|Update JSON tests based on a closed/fixed issues|
+|[#11617](https://github.com/NVIDIA/spark-rapids/pull/11617)|Quick fix for the build script failure of Scala 2.13 jars [skip ci]|
+|[#11614](https://github.com/NVIDIA/spark-rapids/pull/11614)|Ensure repartition overflow test always overflows|
+|[#11612](https://github.com/NVIDIA/spark-rapids/pull/11612)|Revert "Disable regex tests to unblock CI (#11606)"|
+|[#11597](https://github.com/NVIDIA/spark-rapids/pull/11597)|`install_deps` changes for Databricks 14.3|
+|[#11608](https://github.com/NVIDIA/spark-rapids/pull/11608)|Use mvn -f scala2.13/ in the build scripts to build the 2.13 jars|
+|[#11610](https://github.com/NVIDIA/spark-rapids/pull/11610)|Change DataSource calendar interval error to fix spark400 build|
+|[#11549](https://github.com/NVIDIA/spark-rapids/pull/11549)|Adopt `JSONUtils.concatenateJsonStrings` for concatenating JSON strings|
+|[#11595](https://github.com/NVIDIA/spark-rapids/pull/11595)|Remove an unused config shuffle.spillThreads|
+|[#11606](https://github.com/NVIDIA/spark-rapids/pull/11606)|Disable regex tests to unblock CI|
+|[#11605](https://github.com/NVIDIA/spark-rapids/pull/11605)|Fix auto merge conflict 11604 [skip ci]|
+|[#11587](https://github.com/NVIDIA/spark-rapids/pull/11587)|avoid long tail tasks due to PrioritySemaphore, remaing part|
+|[#11574](https://github.com/NVIDIA/spark-rapids/pull/11574)|avoid long tail tasks due to PrioritySemaphore|
+|[#11559](https://github.com/NVIDIA/spark-rapids/pull/11559)|[Spark 4.0] Address test failures in cast_test.py|
+|[#11579](https://github.com/NVIDIA/spark-rapids/pull/11579)|Fix merge conflict with branch-24.10|
+|[#11571](https://github.com/NVIDIA/spark-rapids/pull/11571)|Log reconfigure multi-file thread pool only once|
+|[#11564](https://github.com/NVIDIA/spark-rapids/pull/11564)|Disk spill metric|
+|[#11561](https://github.com/NVIDIA/spark-rapids/pull/11561)|Add in a basic plugin for dataframe UDF support in Apache Spark|
+|[#11563](https://github.com/NVIDIA/spark-rapids/pull/11563)|Fix the latest merge conflict in integration tests|
+|[#11542](https://github.com/NVIDIA/spark-rapids/pull/11542)|Update rapids JNI and private dependency to 24.12.0-SNAPSHOT [skip ci]|
+|[#11493](https://github.com/NVIDIA/spark-rapids/pull/11493)|Support legacy mode for yyyymmdd format|
 
 ## Release 24.10
 
@@ -69,15 +226,21 @@ Generated on 2024-10-31
 ### PRs
 |||
 |:---|:---|
+|[#11683](https://github.com/NVIDIA/spark-rapids/pull/11683)|[DOC] update download page for 2410 hot fix release [skip ci]|
+|[#11680](https://github.com/NVIDIA/spark-rapids/pull/11680)|Update latest changelog [skip ci]|
+|[#11678](https://github.com/NVIDIA/spark-rapids/pull/11678)|Update version to 24.10.1-SNAPSHOT [skip ci]|
 |[#11676](https://github.com/NVIDIA/spark-rapids/pull/11676)| Fix race condition with Parquet filter pushdown modifying shared hadoop Configuration|
 |[#11626](https://github.com/NVIDIA/spark-rapids/pull/11626)|Update latest changelog [skip ci]|
 |[#11624](https://github.com/NVIDIA/spark-rapids/pull/11624)|Update the download link [skip ci]|
 |[#11577](https://github.com/NVIDIA/spark-rapids/pull/11577)|Update latest changelog [skip ci]|
 |[#11576](https://github.com/NVIDIA/spark-rapids/pull/11576)|Update rapids JNI and private dependency to 24.10.0|
 |[#11582](https://github.com/NVIDIA/spark-rapids/pull/11582)|[DOC] update doc for 24.10 release [skip ci]|
+|[#11414](https://github.com/NVIDIA/spark-rapids/pull/11414)|Fix `collection_ops_tests` for Spark 4.0|
 |[#11588](https://github.com/NVIDIA/spark-rapids/pull/11588)|backport fixes of #11573 to branch 24.10|
 |[#11569](https://github.com/NVIDIA/spark-rapids/pull/11569)|Have "dump always" dump input files before trying to decode them|
+|[#11544](https://github.com/NVIDIA/spark-rapids/pull/11544)|Update test case related to LEACY datetime format to unblock nightly CI|
 |[#11567](https://github.com/NVIDIA/spark-rapids/pull/11567)|Fix test case unix_timestamp(col, 'yyyyMMdd') failed for Africa/Casablanca timezone and LEGACY mode|
+|[#11519](https://github.com/NVIDIA/spark-rapids/pull/11519)|Spark 4:  Fix parquet_test.py|
 |[#11496](https://github.com/NVIDIA/spark-rapids/pull/11496)|Update test now that code is fixed|
 |[#11548](https://github.com/NVIDIA/spark-rapids/pull/11548)|Fix negative rs. shuffle write time|
 |[#11545](https://github.com/NVIDIA/spark-rapids/pull/11545)|Update test case related to LEACY datetime format to unblock nightly CI|
@@ -157,215 +320,6 @@ Generated on 2024-10-31
 |[#11280](https://github.com/NVIDIA/spark-rapids/pull/11280)|Asynchronously copy table data to the host during shuffle|
 |[#11258](https://github.com/NVIDIA/spark-rapids/pull/11258)|Explicitly disable ANSI mode for ast_test.py|
 |[#11267](https://github.com/NVIDIA/spark-rapids/pull/11267)|Update the rapids JNI and private dependency version to 24.10.0-SNAPSHOT|
-|[#11241](https://github.com/NVIDIA/spark-rapids/pull/11241)|Auto merge PRs to branch-24.10 from branch-24.08 [skip ci]|
-|[#11231](https://github.com/NVIDIA/spark-rapids/pull/11231)|Cache dependencies for scala 2.13 [skip ci]|
-
-## Release 24.08
-
-### Features
-|||
-|:---|:---|
-|[#9259](https://github.com/NVIDIA/spark-rapids/issues/9259)|[FEA] Create Spark 4.0.0 shim and build env|
-|[#10366](https://github.com/NVIDIA/spark-rapids/issues/10366)|[FEA] It would be nice if we could support Hive-style write bucketing table|
-|[#10987](https://github.com/NVIDIA/spark-rapids/issues/10987)|[FEA] Implement lore framework to support all operators.|
-|[#11087](https://github.com/NVIDIA/spark-rapids/issues/11087)|[FEA] Support regex pattern with brackets when rewrite to PrefixRange patten in rlike|
-|[#22](https://github.com/NVIDIA/spark-rapids/issues/22)|[FEA] Add support for bucketed writes|
-|[#9939](https://github.com/NVIDIA/spark-rapids/issues/9939)|[FEA] `GpuInsertIntoHiveTable` supports parquet format|
-
-### Performance
-|||
-|:---|:---|
-|[#8750](https://github.com/NVIDIA/spark-rapids/issues/8750)|[FEA] Rework GpuSubstringIndex to use cudf::slice_strings|
-|[#7404](https://github.com/NVIDIA/spark-rapids/issues/7404)|[FEA] explore a hash agg passthrough on partial aggregates|
-|[#10976](https://github.com/NVIDIA/spark-rapids/issues/10976)|Rewrite `pattern1|pattern2|pattern3` to multiple contains in `rlike`|
-
-### Bugs Fixed
-|||
-|:---|:---|
-|[#11287](https://github.com/NVIDIA/spark-rapids/issues/11287)|[BUG] String split APIs on empty string produce incorrect result|
-|[#11270](https://github.com/NVIDIA/spark-rapids/issues/11270)|[BUG] test_regexp_replace[DATAGEN_SEED=1722297411, TZ=UTC] hanging there forever in pre-merge CI intermittently|
-|[#9682](https://github.com/NVIDIA/spark-rapids/issues/9682)|[BUG] Casting FLOAT64 to DECIMAL(12,7) produces different rows from Apache Spark CPU|
-|[#10809](https://github.com/NVIDIA/spark-rapids/issues/10809)|[BUG] cast(9.95 as decimal(3,1)), actual: 9.9, expected: 10.0|
-|[#11266](https://github.com/NVIDIA/spark-rapids/issues/11266)|[BUG] test_broadcast_hash_join_constant_keys failed in databricks runtimes|
-|[#11243](https://github.com/NVIDIA/spark-rapids/issues/11243)|[BUG] ArrayIndexOutOfBoundsException on a left outer join|
-|[#11030](https://github.com/NVIDIA/spark-rapids/issues/11030)|Fix tests failures in string_test.py|
-|[#11245](https://github.com/NVIDIA/spark-rapids/issues/11245)|[BUG] mvn verify for the source-javadoc fails and no pre-merge check catches it|
-|[#11223](https://github.com/NVIDIA/spark-rapids/issues/11223)|[BUG] Remove unreferenced `CUDF_VER=xxx` in the CI script|
-|[#11114](https://github.com/NVIDIA/spark-rapids/issues/11114)|[BUG] Update nightly tests for Scala 2.13 to use JDK 17 only|
-|[#11229](https://github.com/NVIDIA/spark-rapids/issues/11229)|[BUG] test_delta_name_column_mapping_no_field_ids fails on Spark |
-|[#11031](https://github.com/NVIDIA/spark-rapids/issues/11031)|Fix tests failures in multiple files |
-|[#10948](https://github.com/NVIDIA/spark-rapids/issues/10948)|Figure out why `MapFromArrays ` appears in the tests for hive parquet write|
-|[#11018](https://github.com/NVIDIA/spark-rapids/issues/11018)|Fix tests failures in hash_aggregate_test.py|
-|[#11173](https://github.com/NVIDIA/spark-rapids/issues/11173)|[BUG] The `rs. serialization time` metric is misleading|
-|[#11017](https://github.com/NVIDIA/spark-rapids/issues/11017)|Fix tests failures in url_test.py|
-|[#11201](https://github.com/NVIDIA/spark-rapids/issues/11201)|[BUG] Delta Lake tables with name mapping can throw exceptions on read|
-|[#11175](https://github.com/NVIDIA/spark-rapids/issues/11175)|[BUG] Clean up unused and duplicated 'org/roaringbitmap' folder in the spark3xx shims|
-|[#11196](https://github.com/NVIDIA/spark-rapids/issues/11196)|[BUG] pipeline failed due to class not found exception: NoClassDefFoundError: com/nvidia/spark/rapids/GpuScalar|
-|[#11189](https://github.com/NVIDIA/spark-rapids/issues/11189)|[BUG] regression in NDS after PR #11170|
-|[#11167](https://github.com/NVIDIA/spark-rapids/issues/11167)|[BUG] UnsupportedOperationException during delta write with `optimize()`|
-|[#11172](https://github.com/NVIDIA/spark-rapids/issues/11172)|[BUG] `get_json_object` returns wrong output with wildcard path|
-|[#11148](https://github.com/NVIDIA/spark-rapids/issues/11148)|[BUG] Integration test `test_write_hive_bucketed_table` fails|
-|[#11155](https://github.com/NVIDIA/spark-rapids/issues/11155)|[BUG] ArrayIndexOutOfBoundsException in BatchWithPartitionData.splitColumnarBatch|
-|[#11152](https://github.com/NVIDIA/spark-rapids/issues/11152)|[BUG] LORE dumping consumes too much memory.|
-|[#11029](https://github.com/NVIDIA/spark-rapids/issues/11029)|Fix tests failures in subquery_test.py|
-|[#11150](https://github.com/NVIDIA/spark-rapids/issues/11150)|[BUG] hive_parquet_write_test.py::test_insert_hive_bucketed_table failure|
-|[#11070](https://github.com/NVIDIA/spark-rapids/issues/11070)|[BUG] numpy2 fail fastparquet cases: numpy.dtype size changed|
-|[#11136](https://github.com/NVIDIA/spark-rapids/issues/11136)|UnaryPositive expression doesn't extend UnaryExpression|
-|[#11122](https://github.com/NVIDIA/spark-rapids/issues/11122)|[BUG] UT MetricRange failed 651070526 was not less than 1.5E8 in spark313|
-|[#11119](https://github.com/NVIDIA/spark-rapids/issues/11119)|[BUG] window_function_test.py::test_window_group_limits_fallback_for_row_number fails in a distributed environment|
-|[#11023](https://github.com/NVIDIA/spark-rapids/issues/11023)|Fix tests failures in dpp_test.py|
-|[#11026](https://github.com/NVIDIA/spark-rapids/issues/11026)|Fix tests failures in map_test.py|
-|[#11020](https://github.com/NVIDIA/spark-rapids/issues/11020)|Fix tests failures in grouping_sets_test.py|
-|[#11113](https://github.com/NVIDIA/spark-rapids/issues/11113)|[BUG] Update premerge tests for Scala 2.13 to use JDK 17 only|
-|[#11027](https://github.com/NVIDIA/spark-rapids/issues/11027)|Fix tests failures in sort_test.py|
-|[#10775](https://github.com/NVIDIA/spark-rapids/issues/10775)|[BUG] Issues found by Spark UT Framework on RapidsStringExpressionsSuite|
-|[#11033](https://github.com/NVIDIA/spark-rapids/issues/11033)|[BUG] CICD failed a case: cmp_test.py::test_empty_filter[>]|
-|[#11103](https://github.com/NVIDIA/spark-rapids/issues/11103)|[BUG] UCX Shuffle With scala.MatchError |
-|[#11007](https://github.com/NVIDIA/spark-rapids/issues/11007)|Fix tests failures in array_test.py|
-|[#10801](https://github.com/NVIDIA/spark-rapids/issues/10801)|[BUG] JDK17 nightly build after Spark UT Framework is merged|
-|[#11019](https://github.com/NVIDIA/spark-rapids/issues/11019)|Fix tests failures in window_function_test.py|
-|[#11063](https://github.com/NVIDIA/spark-rapids/issues/11063)|[BUG] op time for GpuCoalesceBatches is more than actual|
-|[#11006](https://github.com/NVIDIA/spark-rapids/issues/11006)|Fix test failures in arithmetic_ops_test.py|
-|[#10995](https://github.com/NVIDIA/spark-rapids/issues/10995)|Fallback TimeZoneAwareExpression that only support UTC with zoneId instead of timeZone config|
-|[#8652](https://github.com/NVIDIA/spark-rapids/issues/8652)|[BUG] array_item test failures on Spark 3.3.x|
-|[#11053](https://github.com/NVIDIA/spark-rapids/issues/11053)|[BUG] Build on Databricks 330 fails|
-|[#10925](https://github.com/NVIDIA/spark-rapids/issues/10925)| Concat cannot accept no parameter|
-|[#10975](https://github.com/NVIDIA/spark-rapids/issues/10975)|[BUG] regex `^.*literal` cannot be rewritten as `contains(literal)` for multiline strings|
-|[#10956](https://github.com/NVIDIA/spark-rapids/issues/10956)|[BUG] hive_parquet_write_test.py: test_write_compressed_parquet_into_hive_table integration test failures|
-|[#10772](https://github.com/NVIDIA/spark-rapids/issues/10772)|[BUG] Issues found by Spark UT Framework on RapidsDataFrameAggregateSuite|
-|[#10986](https://github.com/NVIDIA/spark-rapids/issues/10986)|[BUG]Cast from string to float using hand-picked values failed in CastOpSuite|
-|[#10972](https://github.com/NVIDIA/spark-rapids/issues/10972)|Spark 4.0 compile errors |
-|[#10794](https://github.com/NVIDIA/spark-rapids/issues/10794)|[BUG] Incorrect cast of string columns containing various infinity notations with trailing spaces |
-|[#10964](https://github.com/NVIDIA/spark-rapids/issues/10964)|[BUG] Improve stability of pre-merge jenkinsfile|
-|[#10714](https://github.com/NVIDIA/spark-rapids/issues/10714)|Signature changed for `PythonUDFRunner.writeUDFs` |
-|[#10712](https://github.com/NVIDIA/spark-rapids/issues/10712)|[AUDIT] BatchScanExec/DataSourceV2Relation to group splits by join keys if they differ from partition keys|
-|[#10673](https://github.com/NVIDIA/spark-rapids/issues/10673)|[AUDIT] Rename plan nodes for PythonMapInArrowExec|
-|[#10710](https://github.com/NVIDIA/spark-rapids/issues/10710)|[AUDIT] `uncacheTableOrView` changed in CommandUtils |
-|[#10711](https://github.com/NVIDIA/spark-rapids/issues/10711)|[AUDIT] Match DataSourceV2ScanExecBase changes to groupPartitions method |
-|[#10669](https://github.com/NVIDIA/spark-rapids/issues/10669)|Supporting broadcast of multiple filtering keys in DynamicPruning |
-
-### PRs
-|||
-|:---|:---|
-|[#11400](https://github.com/NVIDIA/spark-rapids/pull/11400)|[DOC] update notes in download page for the decompressing gzip issue [skip ci]|
-|[#11355](https://github.com/NVIDIA/spark-rapids/pull/11355)|Update changelog for the v24.08 release [skip ci]|
-|[#11353](https://github.com/NVIDIA/spark-rapids/pull/11353)|Update download doc for v24.08.1 [skip ci]|
-|[#11352](https://github.com/NVIDIA/spark-rapids/pull/11352)|Update version to 24.08.1-SNAPSHOT [skip ci]|
-|[#11337](https://github.com/NVIDIA/spark-rapids/pull/11337)|Update changelog for the v24.08 release [skip ci]|
-|[#11335](https://github.com/NVIDIA/spark-rapids/pull/11335)|Fix Delta Lake truncation of min/max string values|
-|[#11304](https://github.com/NVIDIA/spark-rapids/pull/11304)|Update changelog for v24.08.0 release [skip ci]|
-|[#11303](https://github.com/NVIDIA/spark-rapids/pull/11303)|Update rapids JNI and private dependency to 24.08.0|
-|[#11296](https://github.com/NVIDIA/spark-rapids/pull/11296)|[DOC] update doc for 2408 release [skip CI]|
-|[#11309](https://github.com/NVIDIA/spark-rapids/pull/11309)|[Doc ]Update lore doc about the range [skip ci]|
-|[#11292](https://github.com/NVIDIA/spark-rapids/pull/11292)|Add work around for string split with empty input.|
-|[#11278](https://github.com/NVIDIA/spark-rapids/pull/11278)|Fix formatting of advanced configs doc|
-|[#10917](https://github.com/NVIDIA/spark-rapids/pull/10917)|Adopt changes from JNI for casting from float to decimal|
-|[#11269](https://github.com/NVIDIA/spark-rapids/pull/11269)|Revert "upgrade ucx to 1.17.0"|
-|[#11260](https://github.com/NVIDIA/spark-rapids/pull/11260)|Mitigate intermittent test_buckets and shuffle_smoke_test OOM issue|
-|[#11268](https://github.com/NVIDIA/spark-rapids/pull/11268)|Fix degenerate conditional nested loop join detection|
-|[#11244](https://github.com/NVIDIA/spark-rapids/pull/11244)|Fix ArrayIndexOutOfBoundsException on join counts with constant join keys|
-|[#11259](https://github.com/NVIDIA/spark-rapids/pull/11259)|CI Docker to support integration tests with Rocky OS + jdk17 [skip ci]|
-|[#11247](https://github.com/NVIDIA/spark-rapids/pull/11247)|Fix `string_test.py` errors on Spark 4.0|
-|[#11246](https://github.com/NVIDIA/spark-rapids/pull/11246)|Rework Maven Source Plugin Skip|
-|[#11149](https://github.com/NVIDIA/spark-rapids/pull/11149)|Rework on substring index|
-|[#11236](https://github.com/NVIDIA/spark-rapids/pull/11236)|Remove the unused vars from the version-def CI script|
-|[#11237](https://github.com/NVIDIA/spark-rapids/pull/11237)|Fork jvm for maven-source-plugin|
-|[#11200](https://github.com/NVIDIA/spark-rapids/pull/11200)|Multi-get_json_object|
-|[#11230](https://github.com/NVIDIA/spark-rapids/pull/11230)|Skip test where Delta Lake may not be fully compatible with Spark|
-|[#11220](https://github.com/NVIDIA/spark-rapids/pull/11220)|Avoid failing spark bug SPARK-44242 while generate run_dir|
-|[#11226](https://github.com/NVIDIA/spark-rapids/pull/11226)|Fix auto merge conflict 11212|
-|[#11129](https://github.com/NVIDIA/spark-rapids/pull/11129)|Spark 4: Fix miscellaneous tests including logic, repart, hive_delimited.|
-|[#11163](https://github.com/NVIDIA/spark-rapids/pull/11163)|Support `MapFromArrays` on GPU|
-|[#11219](https://github.com/NVIDIA/spark-rapids/pull/11219)|Fix hash_aggregate_test.py to run with ANSI enabled|
-|[#11186](https://github.com/NVIDIA/spark-rapids/pull/11186)|from_json Json to Struct Exception Logging|
-|[#11180](https://github.com/NVIDIA/spark-rapids/pull/11180)|More accurate estimation for the result serialization time in RapidsShuffleThreadedWriterBase|
-|[#11194](https://github.com/NVIDIA/spark-rapids/pull/11194)|Fix ANSI mode test failures in url_test.py|
-|[#11202](https://github.com/NVIDIA/spark-rapids/pull/11202)|Fix read from Delta Lake table with name column mapping and missing Parquet IDs|
-|[#11185](https://github.com/NVIDIA/spark-rapids/pull/11185)|Fix multi-release jar problem|
-|[#11144](https://github.com/NVIDIA/spark-rapids/pull/11144)|Build the Scala2.13 dist jar with JDK17|
-|[#11197](https://github.com/NVIDIA/spark-rapids/pull/11197)|Fix class not found error: com/nvidia/spark/rapids/GpuScalar|
-|[#11191](https://github.com/NVIDIA/spark-rapids/pull/11191)|Fix dynamic pruning regression in GpuFileSourceScanExec|
-|[#10994](https://github.com/NVIDIA/spark-rapids/pull/10994)|Add Spark 4.0.0 Build Profile and Other Supporting Changes|
-|[#11192](https://github.com/NVIDIA/spark-rapids/pull/11192)|Append new authorized user to blossom-ci whitelist [skip ci]|
-|[#11179](https://github.com/NVIDIA/spark-rapids/pull/11179)|Allow more expressions to be tiered|
-|[#11141](https://github.com/NVIDIA/spark-rapids/pull/11141)|Enable some Rapids config in RapidsSQLTestsBaseTrait for Spark UT|
-|[#11170](https://github.com/NVIDIA/spark-rapids/pull/11170)|Avoid listFiles or inputFiles on relations with static partitioning|
-|[#11159](https://github.com/NVIDIA/spark-rapids/pull/11159)|Drop spark31x shims|
-|[#10951](https://github.com/NVIDIA/spark-rapids/pull/10951)|Case when performance improvement: reduce the `copy_if_else`|
-|[#11165](https://github.com/NVIDIA/spark-rapids/pull/11165)|Fix some GpuBroadcastToRowExec by not dropping columns|
-|[#11126](https://github.com/NVIDIA/spark-rapids/pull/11126)|Coalesce batches after a logical coalesce operation|
-|[#11164](https://github.com/NVIDIA/spark-rapids/pull/11164)|fix the bucketed write error for non-utc cases|
-|[#11132](https://github.com/NVIDIA/spark-rapids/pull/11132)|Add deletion vector metrics for low shuffle merge.|
-|[#11156](https://github.com/NVIDIA/spark-rapids/pull/11156)|Fix batch splitting for partition column size on row-count-only batches|
-|[#11153](https://github.com/NVIDIA/spark-rapids/pull/11153)|Fix LORE dump oom.|
-|[#11102](https://github.com/NVIDIA/spark-rapids/pull/11102)|Fix ANSI mode failures in subquery_test.py|
-|[#11151](https://github.com/NVIDIA/spark-rapids/pull/11151)|Fix the test error of the bucketed write for the non-utc case|
-|[#11147](https://github.com/NVIDIA/spark-rapids/pull/11147)|upgrade ucx to 1.17.0|
-|[#11138](https://github.com/NVIDIA/spark-rapids/pull/11138)|Update fastparquet to 2024.5.0 for numpy2 compatibility|
-|[#11137](https://github.com/NVIDIA/spark-rapids/pull/11137)|Handle the change for UnaryPositive now extending RuntimeReplaceable|
-|[#11094](https://github.com/NVIDIA/spark-rapids/pull/11094)|Add `HiveHash` support on GPU|
-|[#11139](https://github.com/NVIDIA/spark-rapids/pull/11139)|Improve MetricsSuite to allow more gc jitter|
-|[#11133](https://github.com/NVIDIA/spark-rapids/pull/11133)|Fix `test_window_group_limits_fallback`|
-|[#11097](https://github.com/NVIDIA/spark-rapids/pull/11097)|Fix miscellaneous integ tests for Spark 4|
-|[#11118](https://github.com/NVIDIA/spark-rapids/pull/11118)|Fix issue with DPP and AQE on reused broadcast exchanges|
-|[#11043](https://github.com/NVIDIA/spark-rapids/pull/11043)|Dataproc serverless test fixes|
-|[#10965](https://github.com/NVIDIA/spark-rapids/pull/10965)|Profiler: Disable collecting async allocation events by default|
-|[#11117](https://github.com/NVIDIA/spark-rapids/pull/11117)|Update Scala2.13 premerge CI against JDK17|
-|[#11084](https://github.com/NVIDIA/spark-rapids/pull/11084)|Introduce LORE framework.|
-|[#11099](https://github.com/NVIDIA/spark-rapids/pull/11099)|Spark 4: Handle ANSI mode in sort_test.py|
-|[#11115](https://github.com/NVIDIA/spark-rapids/pull/11115)|Fix match error in RapidsShuffleIterator.scala [scala2.13]|
-|[#11088](https://github.com/NVIDIA/spark-rapids/pull/11088)|Support regex patterns with brackets when rewriting to PrefixRange pattern in rlike.|
-|[#10950](https://github.com/NVIDIA/spark-rapids/pull/10950)|Add a heuristic to skip second or third agg pass|
-|[#11048](https://github.com/NVIDIA/spark-rapids/pull/11048)|Fixed array_tests for Spark 4.0.0|
-|[#11049](https://github.com/NVIDIA/spark-rapids/pull/11049)|Fix some cast_tests for Spark 4.0.0|
-|[#11066](https://github.com/NVIDIA/spark-rapids/pull/11066)|Replaced spark3xx-common references to spark-shared|
-|[#11083](https://github.com/NVIDIA/spark-rapids/pull/11083)|Exclude a case based on JDK version in Spark UT|
-|[#10997](https://github.com/NVIDIA/spark-rapids/pull/10997)|Fix some test issues in Spark UT and keep RapidsTestSettings update-to-date|
-|[#11073](https://github.com/NVIDIA/spark-rapids/pull/11073)|Disable ANSI mode for window function tests|
-|[#11076](https://github.com/NVIDIA/spark-rapids/pull/11076)|Improve the diagnostics for 'conv' fallback explain|
-|[#11092](https://github.com/NVIDIA/spark-rapids/pull/11092)|Add GpuBucketingUtils shim to Spark 4.0.0|
-|[#11062](https://github.com/NVIDIA/spark-rapids/pull/11062)|fix duplicate counted metrics like op time for GpuCoalesceBatches|
-|[#11044](https://github.com/NVIDIA/spark-rapids/pull/11044)|Fixed Failing tests in arithmetic_ops_tests for Spark 4.0.0|
-|[#11086](https://github.com/NVIDIA/spark-rapids/pull/11086)|upgrade blossom-ci actions version [skip ci]|
-|[#10957](https://github.com/NVIDIA/spark-rapids/pull/10957)|Support bucketing write for GPU|
-|[#10979](https://github.com/NVIDIA/spark-rapids/pull/10979)|[FEA] Introduce low shuffle merge.|
-|[#10996](https://github.com/NVIDIA/spark-rapids/pull/10996)|Fallback non-UTC TimeZoneAwareExpression with zoneId|
-|[#11072](https://github.com/NVIDIA/spark-rapids/pull/11072)|Workaround numpy2 failed fastparquet compatibility tests|
-|[#11046](https://github.com/NVIDIA/spark-rapids/pull/11046)|Calculate parallelism to speed up pre-merge CI|
-|[#11054](https://github.com/NVIDIA/spark-rapids/pull/11054)|fix flaky array_item test failures|
-|[#11051](https://github.com/NVIDIA/spark-rapids/pull/11051)|[FEA] Increase parallelism of deltalake test on databricks|
-|[#10993](https://github.com/NVIDIA/spark-rapids/pull/10993)|`binary-dedupe` changes for Spark 4.0.0|
-|[#11060](https://github.com/NVIDIA/spark-rapids/pull/11060)|Add in the ability to fingerprint JSON columns|
-|[#11059](https://github.com/NVIDIA/spark-rapids/pull/11059)|Revert "Add in the ability to fingerprint JSON columns (#11002)" [skip ci]|
-|[#11039](https://github.com/NVIDIA/spark-rapids/pull/11039)|Concat() Exception bug fix|
-|[#11002](https://github.com/NVIDIA/spark-rapids/pull/11002)|Add in the ability to fingerprint JSON columns|
-|[#10977](https://github.com/NVIDIA/spark-rapids/pull/10977)|Rewrite multiple literal choice regex to multiple contains in rlike|
-|[#11035](https://github.com/NVIDIA/spark-rapids/pull/11035)|Fix auto merge conflict 11034 [skip ci]|
-|[#11040](https://github.com/NVIDIA/spark-rapids/pull/11040)|Append new authorized user to blossom-ci whitelist [skip ci]|
-|[#11036](https://github.com/NVIDIA/spark-rapids/pull/11036)|Update blossom-ci ACL to secure format [skip ci]|
-|[#11032](https://github.com/NVIDIA/spark-rapids/pull/11032)|Fix a hive write test failure for Spark 350|
-|[#10998](https://github.com/NVIDIA/spark-rapids/pull/10998)|Improve log to print more lines in build [skip ci]|
-|[#10992](https://github.com/NVIDIA/spark-rapids/pull/10992)|Addressing the Named Parameter change in Spark 4.0.0|
-|[#10943](https://github.com/NVIDIA/spark-rapids/pull/10943)|Fix Spark UT issues in RapidsDataFrameAggregateSuite|
-|[#10963](https://github.com/NVIDIA/spark-rapids/pull/10963)|Add rapids configs to enable GPU running in Spark UT|
-|[#10978](https://github.com/NVIDIA/spark-rapids/pull/10978)|More compilation fixes for Spark 4.0.0|
-|[#10953](https://github.com/NVIDIA/spark-rapids/pull/10953)|Speed up the integration tests by running them in parallel on the Databricks cluster|
-|[#10958](https://github.com/NVIDIA/spark-rapids/pull/10958)|Fix a hive write test failure|
-|[#10970](https://github.com/NVIDIA/spark-rapids/pull/10970)|Move Support for `RaiseError` to a Shim Excluding Spark 4.0.0|
-|[#10966](https://github.com/NVIDIA/spark-rapids/pull/10966)|Add default value for REF of premerge jenkinsfile to avoid bad overwritten [skip ci]|
-|[#10959](https://github.com/NVIDIA/spark-rapids/pull/10959)|Add new ID to blossom-ci allow list [skip ci]|
-|[#10952](https://github.com/NVIDIA/spark-rapids/pull/10952)|Add shims to take care of the signature change for writeUDFs in PythonUDFRunner|
-|[#10931](https://github.com/NVIDIA/spark-rapids/pull/10931)|Add Support for Renaming of PythonMapInArrow|
-|[#10949](https://github.com/NVIDIA/spark-rapids/pull/10949)|Change dependency version to 24.08.0-SNAPSHOT|
-|[#10857](https://github.com/NVIDIA/spark-rapids/pull/10857)|[Spark 4.0] Account for `PartitionedFileUtil.splitFiles` signature change.|
-|[#10912](https://github.com/NVIDIA/spark-rapids/pull/10912)|GpuInsertIntoHiveTable supports parquet format|
-|[#10863](https://github.com/NVIDIA/spark-rapids/pull/10863)|[Spark 4.0] Account for `CommandUtils.uncacheTableOrView` signature change.|
-|[#10944](https://github.com/NVIDIA/spark-rapids/pull/10944)|Added Shim for BatchScanExec to Support Spark 4.0|
-|[#10946](https://github.com/NVIDIA/spark-rapids/pull/10946)|Unarchive Spark test jar for spark.read(ability)|
-|[#10945](https://github.com/NVIDIA/spark-rapids/pull/10945)|Add Support for Multiple Filtering Keys for Subquery Broadcast|
-|[#10871](https://github.com/NVIDIA/spark-rapids/pull/10871)|Add classloader diagnostics to initShuffleManager error message|
-|[#10933](https://github.com/NVIDIA/spark-rapids/pull/10933)|Fixed Databricks build|
-|[#10929](https://github.com/NVIDIA/spark-rapids/pull/10929)|Append new authorized user to blossom-ci whitelist [skip ci]|
 
 ## Older Releases
 Changelog of older releases can be found at [docs/archives](/docs/archives)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f18fa2ba0cb..21f31ba1498 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -127,15 +127,15 @@ mvn -pl dist -PnoSnapshots package -DskipTests
 Verify that shim-specific classes are hidden from a conventional classloader.
 
 ```bash
-$ javap -cp dist/target/rapids-4-spark_2.12-24.10.1-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl
+$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl
 Error: class not found: com.nvidia.spark.rapids.shims.SparkShimImpl
 ```
 
 However, its bytecode can be loaded if prefixed with `spark3XY` not contained in the package name
 
 ```bash
-$ javap -cp dist/target/rapids-4-spark_2.12-24.10.1-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
-Warning: File dist/target/rapids-4-spark_2.12-24.10.1-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl
+$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
+Warning: File dist/target/rapids-4-spark_2.12-24.12.0-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl
 Compiled from "SparkShims.scala"
 public final class com.nvidia.spark.rapids.shims.SparkShimImpl {
 ```
@@ -178,7 +178,7 @@ mvn package -pl dist -am -Dbuildver=340 -DallowConventionalDistJar=true
 Verify `com.nvidia.spark.rapids.shims.SparkShimImpl` is conventionally loadable:
 
 ```bash
-$ javap -cp dist/target/rapids-4-spark_2.12-24.10.1-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
+$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
 Compiled from "SparkShims.scala"
 public final class com.nvidia.spark.rapids.shims.SparkShimImpl {
 ```
diff --git a/DF_UDF_README.md b/DF_UDF_README.md
new file mode 100644
index 00000000000..a669c87f258
--- /dev/null
+++ b/DF_UDF_README.md
@@ -0,0 +1,117 @@
+# Scala / Java UDFS implemented using data frame
+
+User Defined Functions (UDFs) are used for a number of reasons in Apache Spark. Much of the time it is to implement 
+logic that is either very difficult or impossible to implement using existing SQL/Dataframe APIs directly. But they
+are also used as a way to standardize processing logic across an organization or for code reused.
+
+But UDFs come with some downsides. The biggest one is visibility into the processing being done. SQL is a language that
+can be highly optimized. But a UDF in most cases is a black box, that the SQL optimizer cannot do anything about.
+This can result in less than ideal query planning. Additionally, accelerated execution environments, like the
+RAPIDS Accelerator for Apache Spark have no easy way to replace UDFs with accelerated versions, which can result in
+slow performance.
+
+This attempts to add visibility to the code reuse use case by providing a way to implement a UDF in terms of dataframe
+commands. 
+
+## Setup
+
+The dataframe UDF plugin is packaged in the same jar as the RAPIDS Accelerator for Apache Spark. This jar will need to
+be added as a compile time dependency for code that wants to use this feature as well as adding the jar to your Spark
+classpath just like you would do for GPU acceleration.
+
+If you plan to not use the GPU accelerated processing, but still want dataframe UDF support on CPU applications then
+add `com.nvidia.spark.DFUDFPlugin` to the `spark.sql.extensions` config. If you do use GPU accelerated processing
+the RAPIDS Plugin will enable this automatically. You don't need to set the `spark.sql.extensions` config, but it
+won't hurt anything if you do add it. Now you can implement a UDF in terms of Dataframe operations.
+
+## Usage
+
+```scala
+import com.nvidia.spark.functions._
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.functions._
+
+val sum_array = df_udf((longArray: Column) => 
+  aggregate(longArray,
+    lit(0L),
+    (a, b) => coalesce(a, lit(0L)) + coalesce(b, lit(0L)),
+    a => a))
+spark.udf.register("sum_array", sum_array)
+```
+
+You can then use `sum_array` however you would have used any other UDF. This allows you to provide a drop in replacement
+implementation of an existing UDF. 
+
+```scala
+Seq(Array(1L, 2L, 3L)).toDF("data").selectExpr("sum_array(data) as result").show()
+
++------+
+|result|
++------+
+|     6|
++------+
+```
+
+Java APIs are also supported and should work the same as Spark's UDFs
+
+```java
+import com.nvidia.spark.functions.df_udf
+
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.api.java.UDF2;
+import org.apache.spark.sql.expressions.UserDefinedFunction;
+
+
+UserDefinedFunction myAdd = df_udf((Column lhs, Column rhs) -> lhs + rhs)
+spark.udf().register("myadd", myAdd)
+
+spark.sql("SELECT myadd(1, 1) as r").show();
+// +--+
+// | r|
+// +--+
+// | 2|
+// +--+
+
+```
+
+## Type Checks
+
+DataFrame APIs do not provide type safety when writing the code and that is the same here. There are no builtin type
+checks for inputs yet. Also, because of how types are resolved in Spark there is no way to adjust the query based on
+the types passed in. Type checks are handled by the SQL planner/optimizer after the UDF has been replaced. This means
+that the final SQL will not violate any type safety, but it also means that the errors might be confusing. For example,
+if I passed in an `ARRAY<DOUBLE>` to `sum_array` instead of an `ARRAY<LONG>` I would get an error like
+
+```scala
+Seq(Array(1.0, 2.0, 3.0)).toDF("data").selectExpr("sum_array(data) as result").show()
+org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "aggregate(data, 0, lambdafunction((coalesce(namedlambdavariable(), 0) + coalesce(namedlambdavariable(), 0)), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))" due to data type mismatch: Parameter 3 requires the "BIGINT" type, however "lambdafunction((coalesce(namedlambdavariable(), 0) + coalesce(namedlambdavariable(), 0)), namedlambdavariable(), namedlambdavariable())" has the type "DOUBLE".; line 1 pos 0;
+Project [aggregate(data#46, 0, lambdafunction((cast(coalesce(lambda x_9#49L, 0) as double) + coalesce(lambda y_10#50, cast(0 as double))), lambda x_9#49L, lambda y_10#50, false), lambdafunction(lambda x_11#51L, lambda x_11#51L, false)) AS result#48L]
++- Project [value#43 AS data#46]
+   +- LocalRelation [value#43]
+
+  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73)
+  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5(CheckAnalysis.scala:269)
+  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$5$adapted(CheckAnalysis.scala:256)
+```
+
+Which is not as simple to understand as a normal UDF.
+
+```scala
+val sum_array = udf((a: Array[Long]) => a.sum)
+
+spark.udf.register("sum_array", sum_array)
+
+Seq(Array(1.0, 2.0, 3.0)).toDF("data").selectExpr("sum_array(data) as result").show()
+org.apache.spark.sql.AnalysisException: [CANNOT_UP_CAST_DATATYPE] Cannot up cast array element from "DOUBLE" to "BIGINT".
+  The type path of the target object is:
+- array element class: "long"
+- root class: "[J"
+You can either add an explicit cast to the input data or choose a higher precision type of the field in the target object
+at org.apache.spark.sql.errors.QueryCompilationErrors$.upCastFailureError(QueryCompilationErrors.scala:285)
+at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUpCast$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveUpCast$$fail(Analyzer.scala:3646)
+at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUpCast$$anonfun$apply$57$$anonfun$applyOrElse$234.applyOrElse(Analyzer.scala:3677)
+at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUpCast$$anonfun$apply$57$$anonfun$applyOrElse$234.applyOrElse(Analyzer.scala:3654)
+```
+
+We hope to add optional type checks in the future.
diff --git a/README.md b/README.md
index 01e2076bdf8..65e194de3c2 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ as a `provided` dependency.
 <dependency>
     <groupId>com.nvidia</groupId>
     <artifactId>rapids-4-spark_2.12</artifactId>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <scope>provided</scope>
 </dependency>
 ```
diff --git a/aggregator/pom.xml b/aggregator/pom.xml
index a5b47a827d5..1ba28e86568 100644
--- a/aggregator/pom.xml
+++ b/aggregator/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-aggregator_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Aggregator</name>
     <description>Creates an aggregated shaded package of the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>aggregator</rapids.module>
@@ -71,6 +71,28 @@
             <version>${spark-rapids-private.version}</version>
             <classifier>${spark.version.classifier}</classifier>
         </dependency>
+        <!--
+            Placeholders for the maximum number of delta-lake shims per Spark shim:
+            If a Shim needs only N it uses rapids.delta.artifactIdN+1 = rapids.delta.artifactIdN
+        -->
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId1}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId2}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId3}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
     </dependencies>
     <build>
         <plugins>
@@ -262,507 +284,4 @@
             </plugin>
         </plugins>
     </build>
-
-    <profiles>
-        <profile>
-            <id>release320</id>
-            <activation>
-                <!-- #if scala-2.12 -->
-                <activeByDefault>true</activeByDefault>
-                <!-- #endif scala-2.12 -->
-                <property>
-                    <name>buildver</name>
-                    <value>320</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release321</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>321</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release321cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>321cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release322</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>322</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release323</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>323</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release324</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>324</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330</id>
-            <activation>
-                <!-- #if scala-2.13 --><!--
-                <activeByDefault>true</activeByDefault>
-                --><!-- #endif scala-2.13 -->
-                <property>
-                    <name>buildver</name>
-                    <value>330</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>330cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>330db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark330db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release331</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>331</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark332db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release341db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>341db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark341db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release333</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>333</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release334</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>334</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release340</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>340</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release341</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>341</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release342</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>342</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release343</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>343</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release350</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>350</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release351</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>351</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release352</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>352</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <!-- #if scala-2.13 --><!--
-        <profile>
-            <id>release400</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>400</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        --><!-- #endif scala-2.13 -->
-    </profiles>
 </project>
diff --git a/api_validation/pom.xml b/api_validation/pom.xml
index 653f2def0bc..697fb4c7759 100644
--- a/api_validation/pom.xml
+++ b/api_validation/pom.xml
@@ -22,11 +22,11 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-api-validation_2.12</artifactId>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>api_validation</rapids.module>
diff --git a/build/buildall b/build/buildall
index 0599d080054..2da61fe6451 100755
--- a/build/buildall
+++ b/build/buildall
@@ -86,7 +86,7 @@ function bloopInstall() {
 
 function versionsFromDistProfile() {
   [[ "$BUILD_ALL_DEBUG" == "1" ]] && set -x
-  versionRawStr=$(mvn -B help:evaluate -q -pl dist -P"$1" -Dexpression=included_buildvers -DforceStdout)
+  versionRawStr=$($MVN -B help:evaluate -q -pl dist -P"$1" -Dexpression=included_buildvers -DforceStdout)
   versionStr=${versionRawStr//[$'\n',]/}
   echo -n $versionStr
 }
@@ -171,6 +171,7 @@ fi
 export MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 ${MVN_OPT}"
 
 if [[ "$SCALA213" == "1" ]]; then
+  MVN="$MVN -f scala2.13/"
   DIST_PROFILE=${DIST_PROFILE:-"noSnapshotsScala213"}
   $(dirname $0)/make-scala-version-build-files.sh 2.13
 else
@@ -234,10 +235,6 @@ if [[ "$SKIP_CLEAN" != "1" ]]; then
   $MVN -q clean
 fi
 
-if [[ "$SCALA213" == "1" ]]; then
-  cd scala2.13
-fi
-
 echo "Building a combined dist jar with Shims for ${SPARK_SHIM_VERSIONS[@]} ..."
 
 function build_single_shim() {
diff --git a/build/get_buildvers.py b/build/get_buildvers.py
index bfce9656054..5fe864670b5 100644
--- a/build/get_buildvers.py
+++ b/build/get_buildvers.py
@@ -34,7 +34,7 @@ def _get_buildvers(buildvers, pom_file, logger=None):
         else:
             no_snapshots.append(release)
     excluded_shims = pom.find(".//pom:dyn.shim.excluded.releases", ns)
-    if excluded_shims:
+    if excluded_shims is not None:
         for removed_shim in [x.strip() for x in excluded_shims.text.split(",")]:
             if removed_shim in snapshots:
                 snapshots.remove(removed_shim)
@@ -48,8 +48,8 @@ def _get_buildvers(buildvers, pom_file, logger=None):
     if "scala2.13" in pom_file:
         no_snapshots = list(filter(lambda x: not x.endswith("cdh"), no_snapshots))
 
-    db_release = list(filter(lambda x: x.endswith("db"), no_snapshots))
-    no_snapshots = list(filter(lambda x: not x.endswith("db"), no_snapshots))
+    db_release = list(filter(lambda x: "db" in x, no_snapshots))
+    no_snapshots = list(filter(lambda x: "db" not in x, no_snapshots))
     snap_and_no_snap = no_snapshots + snapshots
     snap_and_no_snap_with_db = snap_and_no_snap + db_release
     no_snap_with_db = no_snapshots + db_release
diff --git a/build/make-scala-version-build-files.sh b/build/make-scala-version-build-files.sh
index ad3482ee979..ae6ae016e4e 100755
--- a/build/make-scala-version-build-files.sh
+++ b/build/make-scala-version-build-files.sh
@@ -18,6 +18,20 @@
 
 set -e
 
+trap_func() {
+  rv=$?
+  if [[ $rv == 0 ]]; then
+    echo DONE scala2.13 poms generated: exit code = $rv
+  else
+    echo ERROR generating scala2.13 poms, re-execute with:
+    echo "  bash -x $*"
+    echo to inspect the error output
+    exit $rv
+  fi
+}
+
+trap "trap_func" EXIT
+
 VALID_VERSIONS=( 2.13 )
 declare -A DEFAULT_SPARK
 DEFAULT_SPARK[2.12]="spark320"
diff --git a/build/shimplify.py b/build/shimplify.py
index a942f9a05b9..d551d2f3bbd 100644
--- a/build/shimplify.py
+++ b/build/shimplify.py
@@ -84,6 +84,7 @@
 import os
 import re
 import subprocess
+from functools import partial
 
 
 def __project():
@@ -199,7 +200,9 @@ def __csv_as_arr(str_val):
 __shim_comment_pattern = re.compile(re.escape(__opening_shim_tag) +
                                     r'\n(.*)\n' +
                                     re.escape(__closing_shim_tag), re.DOTALL)
-
+__spark_version_classifier = '$_spark.version.classifier_'
+__spark_version_placeholder = re.escape(__spark_version_classifier)
+__package_pattern = re.compile('package .*' + '(' + __spark_version_placeholder + ')')
 def __upsert_shim_json(filename, bv_list):
     with open(filename, 'r') as file:
         contents = file.readlines()
@@ -365,10 +368,7 @@ def __generate_symlinks():
         __log.info("# generating symlinks for shim %s %s files", buildver, src_type)
         __traverse_source_tree_of_all_shims(
             src_type,
-            lambda src_type, path, build_ver_arr: __generate_symlink_to_file(buildver,
-                                                                             src_type,
-                                                                             path,
-                                                                             build_ver_arr))
+            partial(__generate_symlink_to_file, buildver=buildver, src_type=src_type))
 
 def __traverse_source_tree_of_all_shims(src_type, func):
     """Walks src/<src_type>/sparkXYZ"""
@@ -392,11 +392,10 @@ def __traverse_source_tree_of_all_shims(src_type, func):
                 build_ver_arr = map(lambda x: str(json.loads(x).get('spark')), shim_arr)
                 __log.debug("extracted shims %s", build_ver_arr)
                 assert build_ver_arr == sorted(build_ver_arr),\
-                    "%s shim list is not properly sorted" % shim_file_path
-                func(src_type, shim_file_path, build_ver_arr)
-
+                    "%s shim list is not properly sorted: %s" % (shim_file_path, build_ver_arr)
+                func(shim_file_path=shim_file_path, build_ver_arr=build_ver_arr, shim_file_txt=shim_file_txt)
 
-def __generate_symlink_to_file(buildver, src_type, shim_file_path, build_ver_arr):
+def __generate_symlink_to_file(buildver, src_type, shim_file_path, build_ver_arr, shim_file_txt):
     if buildver in build_ver_arr:
         project_base_dir = str(__project().getBaseDir())
         base_dir = __src_basedir
@@ -416,9 +415,32 @@ def __generate_symlink_to_file(buildver, src_type, shim_file_path, build_ver_arr
         target_shim_file_path = os.path.join(target_root, target_rel_path)
         __log.debug("creating symlink %s -> %s", target_shim_file_path, shim_file_path)
         __makedirs(os.path.dirname(target_shim_file_path))
-        if __should_overwrite:
+        package_match = __package_pattern.search(shim_file_txt)
+        if __should_overwrite or package_match:
             __remove_file(target_shim_file_path)
-        __symlink(shim_file_path, target_shim_file_path)
+        if package_match:
+            with open(target_shim_file_path, mode='w') as f:
+                f.write(shim_file_txt[0:package_match.start(1)])
+                f.write("spark")
+                f.write(buildver)
+                f.write('\n')
+                f.write('''
+/*
+!!! DO NOT EDIT THIS FILE !!!
+
+This file has been generated from the original
+
+%s
+
+by interpolating $_spark.version.classifier_=%s
+
+Be sure to edit the original file if required
+
+*/
+                ''' % (shim_file_path, 'spark' + buildver))
+                f.write(shim_file_txt[package_match.end(1):])
+        else:
+            __symlink(shim_file_path, target_shim_file_path)
 
 
 def __symlink(src, target):
@@ -456,8 +478,7 @@ def __shimplify_layout():
         for src_type in ['main', 'test']:
             __traverse_source_tree_of_all_shims(
                 src_type,
-                lambda unused_src_type, shim_file_path, build_ver_arr:
-                __update_files2bv(files2bv, shim_file_path, build_ver_arr))
+                partial(__update_files2bv, files2bv=files2bv))
 
     # adding a new shim?
     if __add_shim_buildver is not None:
@@ -486,11 +507,17 @@ def __shimplify_layout():
                     __git_rename_or_copy(shim_file, owner_shim)
 
 
-def __update_files2bv(files2bv, path, buildver_arr):
-    assert path not in files2bv.keys(), "new path %s %s should be "\
-        "encountered only once, current map\n%s" % (path, buildver_arr, files2bv)
-    __log.debug("Adding %s %s to files to shim map", path, buildver_arr)
-    files2bv[path] = buildver_arr
+def __update_files2bv(files2bv,
+                      # TODO an anachronism requirement: that the following two params
+                      # have the same name along generate_symlink_file
+                      shim_file_path,
+                      build_ver_arr,
+                      #
+                      **kwargs):
+    assert shim_file_path not in files2bv.keys(), "new path %s %s should be "\
+        "encountered only once, current map\n%s" % (shim_file_path, build_ver_arr, files2bv)
+    __log.debug("Adding %s %s to files to shim map", shim_file_path, build_ver_arr)
+    files2bv[shim_file_path] = build_ver_arr
 
 
 def __add_new_shim_to_file_map(files2bv):
diff --git a/datagen/README.md b/datagen/README.md
index 2855bbdd8b4..022cc2f1eba 100644
--- a/datagen/README.md
+++ b/datagen/README.md
@@ -24,12 +24,12 @@ Where `$SPARK_VERSION` is a compressed version number, like 330 for Spark 3.3.0.
 
 After this the jar should be at
 `target/datagen_2.12-$PLUGIN_VERSION-spark$SPARK_VERSION.jar`
-for example a Spark 3.3.0 jar for the 24.10.1 release would be
-`target/datagen_2.12-24.10.1-spark330.jar`
+for example a Spark 3.3.0 jar for the 24.12.0 release would be
+`target/datagen_2.12-24.12.0-spark330.jar`
 
 To get a spark shell with this you can run
 ```shell
-spark-shell --jars target/datagen_2.12-24.10.1-spark330.jar
+spark-shell --jars target/datagen_2.12-24.12.0-spark330.jar
 ```
 
 After that you should be good to go.
diff --git a/datagen/ScaleTest.md b/datagen/ScaleTest.md
index 63d2bc3b82a..bb5c4a1c988 100644
--- a/datagen/ScaleTest.md
+++ b/datagen/ScaleTest.md
@@ -44,7 +44,7 @@ $SPARK_HOME/bin/spark-submit \
 --conf spark.sql.parquet.datetimeRebaseModeInWrite=CORRECTED \
 --class com.nvidia.rapids.tests.scaletest.ScaleTestDataGen \ # the main class
 --jars $SPARK_HOME/examples/jars/scopt_2.12-3.7.1.jar \ # one dependency jar just shipped with Spark under $SPARK_HOME
-./target/datagen_2.12-24.10.1-spark332.jar \
+./target/datagen_2.12-24.12.0-spark332.jar \
 1 \
 10 \
 parquet \
diff --git a/datagen/pom.xml b/datagen/pom.xml
index b22db7444c6..64e48ff9c85 100644
--- a/datagen/pom.xml
+++ b/datagen/pom.xml
@@ -21,18 +21,19 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>datagen_2.12</artifactId>
     <name>Data Generator</name>
     <description>Tools for generating large amounts of data</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>datagen</rapids.module>
         <target.classifier/>
         <rapids.default.jar.excludePattern>**/*</rapids.default.jar.excludePattern>
         <rapids.shim.jar.phase>package</rapids.shim.jar.phase>
+        <build.info.path>${project.build.outputDirectory}/datagen-version-info.properties</build.info.path>
     </properties>
     <dependencies>
         <dependency>
diff --git a/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala b/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
index 9134505c2f2..5ae88606cfb 100644
--- a/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
+++ b/datagen/src/main/spark320/scala/org/apache/spark/sql/tests/datagen/DataGenExprShims.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.tests.datagen
 
diff --git a/delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaLog.scala b/delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaLog.scala
index 2927b8607ad..58bcde1a855 100644
--- a/delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaLog.scala
+++ b/delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaLog.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -68,7 +68,7 @@ object GpuDeltaLog {
       dataPath: String,
       options: Map[String, String],
       rapidsConf: RapidsConf): GpuDeltaLog = {
-    val deltaLog = DeltaLog.forTable(spark, dataPath, options)
+    val deltaLog = DeltaLog.forTable(spark, new Path(dataPath), options)
     new GpuDeltaLog(deltaLog, rapidsConf)
   }
 
diff --git a/delta-lake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DatabricksDeltaProviderBase.scala b/delta-lake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DatabricksDeltaProviderBase.scala
index 55f9cc2ae49..508b641d890 100644
--- a/delta-lake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DatabricksDeltaProviderBase.scala
+++ b/delta-lake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DatabricksDeltaProviderBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -337,7 +337,7 @@ class DeltaCreatableRelationProviderMeta(
     }
     val path = saveCmd.options.get("path")
     if (path.isDefined) {
-      val deltaLog = DeltaLog.forTable(SparkSession.active, path.get, saveCmd.options)
+      val deltaLog = DeltaLog.forTable(SparkSession.active, new Path(path.get), saveCmd.options)
       RapidsDeltaUtils.tagForDeltaWrite(this, saveCmd.query.schema, Some(deltaLog),
         saveCmd.options, SparkSession.active)
     } else {
@@ -346,4 +346,4 @@ class DeltaCreatableRelationProviderMeta(
   }
 
   override def convertToGpu(): GpuCreatableRelationProvider = new GpuDeltaDataSource(conf)
-}
\ No newline at end of file
+}
diff --git a/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala b/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala
index 1a9936ea808..0c212d6842a 100644
--- a/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala
+++ b/delta-lake/common/src/main/databricks/scala/org/apache/spark/sql/rapids/delta/GpuOptimizeWriteExchangeExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * This file was derived from OptimizeWriteExchange.scala
  * in the Delta Lake project at https://github.com/delta-io/delta
@@ -26,7 +26,7 @@ import scala.concurrent.Future
 import scala.concurrent.duration.Duration
 
 import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
-import com.nvidia.spark.rapids.{GpuColumnarBatchSerializer, GpuExec, GpuMetric, GpuPartitioning, GpuRoundRobinPartitioning}
+import com.nvidia.spark.rapids.{GpuColumnarBatchSerializer, GpuExec, GpuMetric, GpuPartitioning, GpuRoundRobinPartitioning, RapidsConf}
 import com.nvidia.spark.rapids.delta.RapidsDeltaSQLConf
 
 import org.apache.spark.{MapOutputStatistics, ShuffleDependency}
@@ -98,7 +98,9 @@ case class GpuOptimizeWriteExchangeExec(
   }
 
   private lazy val serializer: Serializer =
-    new GpuColumnarBatchSerializer(gpuLongMetric("dataSize"))
+    new GpuColumnarBatchSerializer(gpuLongMetric("dataSize"),
+      child.output.map(_.dataType).toArray,
+      RapidsConf.SHUFFLE_KUDO_SERIALIZER_ENABLED.get(child.conf))
 
   @transient lazy val inputRDD: RDD[ColumnarBatch] = child.executeColumnar()
 
diff --git a/delta-lake/delta-20x/pom.xml b/delta-lake/delta-20x/pom.xml
index ce78cd3747a..a07d81e0f25 100644
--- a/delta-lake/delta-20x/pom.xml
+++ b/delta-lake/delta-20x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-20x_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.0.x Support</name>
     <description>Delta Lake 2.0.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-20x</rapids.module>
diff --git a/delta-lake/delta-21x/pom.xml b/delta-lake/delta-21x/pom.xml
index b87f8e3107a..3ad3e3c83fc 100644
--- a/delta-lake/delta-21x/pom.xml
+++ b/delta-lake/delta-21x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-21x_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.1.x Support</name>
     <description>Delta Lake 2.1.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-21x</rapids.module>
diff --git a/delta-lake/delta-22x/pom.xml b/delta-lake/delta-22x/pom.xml
index dbf2d0316ca..5d4d389b097 100644
--- a/delta-lake/delta-22x/pom.xml
+++ b/delta-lake/delta-22x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-22x_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.2.x Support</name>
     <description>Delta Lake 2.2.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-22x</rapids.module>
diff --git a/delta-lake/delta-23x/pom.xml b/delta-lake/delta-23x/pom.xml
index 05d18e8d74c..ea394bd26b2 100644
--- a/delta-lake/delta-23x/pom.xml
+++ b/delta-lake/delta-23x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-23x_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support</name>
     <description>Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-23x</rapids.module>
diff --git a/delta-lake/delta-24x/pom.xml b/delta-lake/delta-24x/pom.xml
index 6f03bd3af8c..ee1c7926245 100644
--- a/delta-lake/delta-24x/pom.xml
+++ b/delta-lake/delta-24x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-24x_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.4.x Support</name>
     <description>Delta Lake 2.4.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-24x</rapids.module>
diff --git a/delta-lake/delta-spark330db/pom.xml b/delta-lake/delta-spark330db/pom.xml
index 54ea4dcda29..e5f60afb125 100644
--- a/delta-lake/delta-spark330db/pom.xml
+++ b/delta-lake/delta-spark330db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark330db_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 11.3 Delta Lake Support</name>
     <description>Databricks 11.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-spark330db</rapids.module>
diff --git a/delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala b/delta-lake/delta-spark330db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
similarity index 100%
rename from delta-lake/common/src/main/databricks/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
rename to delta-lake/delta-spark330db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
diff --git a/delta-lake/delta-spark332db/pom.xml b/delta-lake/delta-spark332db/pom.xml
index 66fd0ec856b..102c91daf82 100644
--- a/delta-lake/delta-spark332db/pom.xml
+++ b/delta-lake/delta-spark332db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark332db_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 12.2 Delta Lake Support</name>
     <description>Databricks 12.2 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-spark332db</rapids.module>
diff --git a/delta-lake/delta-spark332db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala b/delta-lake/delta-spark332db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
new file mode 100644
index 00000000000..b6e9e11946d
--- /dev/null
+++ b/delta-lake/delta-spark332db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.FileAction
+import com.databricks.sql.transaction.tahoe.constraints.{Constraint, DeltaInvariantCheckerExec}
+import com.databricks.sql.transaction.tahoe.files.TahoeBatchFileIndex
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, NamedExpression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.rapids.GpuShuffleEnv
+import org.apache.spark.sql.rapids.GpuV1WriteUtils.GpuEmpty2Null
+import org.apache.spark.sql.rapids.delta.{DeltaShufflePartitionsUtil, GpuOptimizeWriteExchangeExec, OptimizeWriteExchangeExec}
+import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.util.Clock
+
+/**
+ * Used to perform a set of reads in a transaction and then commit a set of updates to the
+ * state of the log.  All reads from the DeltaLog, MUST go through this instance rather
+ * than directly to the DeltaLog otherwise they will not be check for logical conflicts
+ * with concurrent updates.
+ *
+ * This class is not thread-safe.
+ *
+ * @param deltaLog The Delta Log for the table this transaction is modifying.
+ * @param snapshot The snapshot that this transaction is reading at.
+ * @param rapidsConf RAPIDS Accelerator config settings.
+ */
+abstract class GpuOptimisticTransactionBase
+    (deltaLog: DeltaLog, snapshot: Snapshot, val rapidsConf: RapidsConf)
+    (implicit clock: Clock)
+  extends OptimisticTransaction(deltaLog, snapshot)(clock)
+  with DeltaLogging {
+
+  /**
+   * Adds checking of constraints on the table
+   * @param plan Plan to generate the table to check against constraints
+   * @param constraints Constraints to check on the table
+   * @return GPU columnar plan to execute
+   */
+  protected def addInvariantChecks(plan: SparkPlan, constraints: Seq[Constraint]): SparkPlan = {
+    val cpuInvariants =
+      DeltaInvariantCheckerExec.buildInvariantChecks(plan.output, constraints, plan.session)
+    GpuCheckDeltaInvariant.maybeConvertToGpu(cpuInvariants, rapidsConf) match {
+      case Some(gpuInvariants) =>
+        val gpuPlan = convertToGpu(plan)
+        GpuDeltaInvariantCheckerExec(gpuPlan, gpuInvariants)
+      case None =>
+        val cpuPlan = convertToCpu(plan)
+        DeltaInvariantCheckerExec(cpuPlan, constraints)
+    }
+  }
+
+  /** GPU version of convertEmptyToNullIfNeeded */
+  private def gpuConvertEmptyToNullIfNeeded(
+      plan: GpuExec,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    if (!spark.conf.get(DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL)) {
+      return plan
+    }
+    // No need to convert if there are no constraints. The empty strings will be converted later by
+    // FileFormatWriter and FileFormatDataWriter. Note that we might still do unnecessary convert
+    // here as the constraints might not be related to the string partition columns. A precise
+    // check will need to walk the constraints to see if such columns are really involved. It
+    // doesn't seem to worth the effort.
+    if (constraints.isEmpty) return plan
+
+    val partSet = AttributeSet(partCols)
+    var needConvert = false
+    val projectList: Seq[NamedExpression] = plan.output.map {
+      case p if partSet.contains(p) && p.dataType == StringType =>
+        needConvert = true
+        GpuAlias(GpuEmpty2Null(p), p.name)()
+      case attr => attr
+    }
+    if (needConvert) GpuProjectExec(projectList.toList, plan) else plan
+  }
+
+  /**
+   * If there is any string partition column and there are constraints defined, add a projection to
+   * convert empty string to null for that column. The empty strings will be converted to null
+   * eventually even without this convert, but we want to do this earlier before check constraints
+   * so that empty strings are correctly rejected. Note that this should not cause the downstream
+   * logic in `FileFormatWriter` to add duplicate conversions because the logic there checks the
+   * partition column using the original plan's output. When the plan is modified with additional
+   * projections, the partition column check won't match and will not add more conversion.
+   *
+   * @param plan The original SparkPlan.
+   * @param partCols The partition columns.
+   * @param constraints The defined constraints.
+   * @return A SparkPlan potentially modified with an additional projection on top of `plan`
+   */
+  override def convertEmptyToNullIfNeeded(
+      plan: SparkPlan,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    // Reuse the CPU implementation if the plan ends up on the CPU, otherwise do the
+    // equivalent on the GPU.
+    plan match {
+      case g: GpuExec => gpuConvertEmptyToNullIfNeeded(g, partCols, constraints)
+      case _ => super.convertEmptyToNullIfNeeded(plan, partCols, constraints)
+    }
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    writeFiles(inputData, None, additionalConstraints)
+  }
+
+  protected def applyOptimizeWriteIfNeeded(
+      spark: SparkSession,
+      physicalPlan: SparkPlan,
+      partitionSchema: StructType,
+      isOptimize: Boolean): SparkPlan = {
+    val optimizeWriteEnabled = !isOptimize &&
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED)
+            .orElse(DeltaConfigs.OPTIMIZE_WRITE.fromMetaData(metadata)).getOrElse(false)
+    if (optimizeWriteEnabled) {
+      val planWithoutTopRepartition =
+        DeltaShufflePartitionsUtil.removeTopRepartition(physicalPlan)
+      val partitioning = DeltaShufflePartitionsUtil.partitioningForRebalance(
+        physicalPlan.output, partitionSchema, spark.sessionState.conf.numShufflePartitions)
+      planWithoutTopRepartition match {
+        case p: GpuExec =>
+          val partMeta = GpuOverrides.wrapPart(partitioning, rapidsConf, None)
+          partMeta.tagForGpu()
+          if (partMeta.canThisBeReplaced) {
+            val plan = GpuOptimizeWriteExchangeExec(partMeta.convertToGpu(), p)
+            if (GpuShuffleEnv.useGPUShuffle(rapidsConf)) {
+              GpuCoalesceBatches(plan, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+            } else {
+              GpuShuffleCoalesceExec(plan, rapidsConf.gpuTargetBatchSizeBytes)
+            }
+          } else {
+            GpuColumnarToRowExec(OptimizeWriteExchangeExec(partitioning, p))
+          }
+        case p =>
+          OptimizeWriteExchangeExec(partitioning, p)
+      }
+    } else {
+      physicalPlan
+    }
+  }
+
+  protected def isOptimizeCommand(plan: LogicalPlan): Boolean = {
+    val leaves = plan.collectLeaves()
+    leaves.size == 1 && leaves.head.collect {
+      case LogicalRelation(HadoopFsRelation(
+      index: TahoeBatchFileIndex, _, _, _, _, _), _, _, _) =>
+        index.actionType.equals("Optimize")
+    }.headOption.getOrElse(false)
+  }
+
+  protected def convertToCpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuRowToColumnarExec(p, _) => p
+    case p: GpuExec => GpuColumnarToRowExec(p)
+    case p => p
+  }
+
+  protected def convertToGpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuColumnarToRowExec(p, _) => p
+    case p: GpuExec => p
+    case p => GpuRowToColumnarExec(p, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+  }
+}
diff --git a/delta-lake/delta-spark341db/pom.xml b/delta-lake/delta-spark341db/pom.xml
index eff6346fe5f..39e7c0b2dd4 100644
--- a/delta-lake/delta-spark341db/pom.xml
+++ b/delta-lake/delta-spark341db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark341db_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support</name>
     <description>Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.compressed.artifact>false</rapids.compressed.artifact>
diff --git a/delta-lake/delta-spark341db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala b/delta-lake/delta-spark341db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
new file mode 100644
index 00000000000..b6e9e11946d
--- /dev/null
+++ b/delta-lake/delta-spark341db/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.FileAction
+import com.databricks.sql.transaction.tahoe.constraints.{Constraint, DeltaInvariantCheckerExec}
+import com.databricks.sql.transaction.tahoe.files.TahoeBatchFileIndex
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, NamedExpression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.rapids.GpuShuffleEnv
+import org.apache.spark.sql.rapids.GpuV1WriteUtils.GpuEmpty2Null
+import org.apache.spark.sql.rapids.delta.{DeltaShufflePartitionsUtil, GpuOptimizeWriteExchangeExec, OptimizeWriteExchangeExec}
+import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.util.Clock
+
+/**
+ * Used to perform a set of reads in a transaction and then commit a set of updates to the
+ * state of the log.  All reads from the DeltaLog, MUST go through this instance rather
+ * than directly to the DeltaLog otherwise they will not be check for logical conflicts
+ * with concurrent updates.
+ *
+ * This class is not thread-safe.
+ *
+ * @param deltaLog The Delta Log for the table this transaction is modifying.
+ * @param snapshot The snapshot that this transaction is reading at.
+ * @param rapidsConf RAPIDS Accelerator config settings.
+ */
+abstract class GpuOptimisticTransactionBase
+    (deltaLog: DeltaLog, snapshot: Snapshot, val rapidsConf: RapidsConf)
+    (implicit clock: Clock)
+  extends OptimisticTransaction(deltaLog, snapshot)(clock)
+  with DeltaLogging {
+
+  /**
+   * Adds checking of constraints on the table
+   * @param plan Plan to generate the table to check against constraints
+   * @param constraints Constraints to check on the table
+   * @return GPU columnar plan to execute
+   */
+  protected def addInvariantChecks(plan: SparkPlan, constraints: Seq[Constraint]): SparkPlan = {
+    val cpuInvariants =
+      DeltaInvariantCheckerExec.buildInvariantChecks(plan.output, constraints, plan.session)
+    GpuCheckDeltaInvariant.maybeConvertToGpu(cpuInvariants, rapidsConf) match {
+      case Some(gpuInvariants) =>
+        val gpuPlan = convertToGpu(plan)
+        GpuDeltaInvariantCheckerExec(gpuPlan, gpuInvariants)
+      case None =>
+        val cpuPlan = convertToCpu(plan)
+        DeltaInvariantCheckerExec(cpuPlan, constraints)
+    }
+  }
+
+  /** GPU version of convertEmptyToNullIfNeeded */
+  private def gpuConvertEmptyToNullIfNeeded(
+      plan: GpuExec,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    if (!spark.conf.get(DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL)) {
+      return plan
+    }
+    // No need to convert if there are no constraints. The empty strings will be converted later by
+    // FileFormatWriter and FileFormatDataWriter. Note that we might still do unnecessary convert
+    // here as the constraints might not be related to the string partition columns. A precise
+    // check will need to walk the constraints to see if such columns are really involved. It
+    // doesn't seem to worth the effort.
+    if (constraints.isEmpty) return plan
+
+    val partSet = AttributeSet(partCols)
+    var needConvert = false
+    val projectList: Seq[NamedExpression] = plan.output.map {
+      case p if partSet.contains(p) && p.dataType == StringType =>
+        needConvert = true
+        GpuAlias(GpuEmpty2Null(p), p.name)()
+      case attr => attr
+    }
+    if (needConvert) GpuProjectExec(projectList.toList, plan) else plan
+  }
+
+  /**
+   * If there is any string partition column and there are constraints defined, add a projection to
+   * convert empty string to null for that column. The empty strings will be converted to null
+   * eventually even without this convert, but we want to do this earlier before check constraints
+   * so that empty strings are correctly rejected. Note that this should not cause the downstream
+   * logic in `FileFormatWriter` to add duplicate conversions because the logic there checks the
+   * partition column using the original plan's output. When the plan is modified with additional
+   * projections, the partition column check won't match and will not add more conversion.
+   *
+   * @param plan The original SparkPlan.
+   * @param partCols The partition columns.
+   * @param constraints The defined constraints.
+   * @return A SparkPlan potentially modified with an additional projection on top of `plan`
+   */
+  override def convertEmptyToNullIfNeeded(
+      plan: SparkPlan,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    // Reuse the CPU implementation if the plan ends up on the CPU, otherwise do the
+    // equivalent on the GPU.
+    plan match {
+      case g: GpuExec => gpuConvertEmptyToNullIfNeeded(g, partCols, constraints)
+      case _ => super.convertEmptyToNullIfNeeded(plan, partCols, constraints)
+    }
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    writeFiles(inputData, None, additionalConstraints)
+  }
+
+  protected def applyOptimizeWriteIfNeeded(
+      spark: SparkSession,
+      physicalPlan: SparkPlan,
+      partitionSchema: StructType,
+      isOptimize: Boolean): SparkPlan = {
+    val optimizeWriteEnabled = !isOptimize &&
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED)
+            .orElse(DeltaConfigs.OPTIMIZE_WRITE.fromMetaData(metadata)).getOrElse(false)
+    if (optimizeWriteEnabled) {
+      val planWithoutTopRepartition =
+        DeltaShufflePartitionsUtil.removeTopRepartition(physicalPlan)
+      val partitioning = DeltaShufflePartitionsUtil.partitioningForRebalance(
+        physicalPlan.output, partitionSchema, spark.sessionState.conf.numShufflePartitions)
+      planWithoutTopRepartition match {
+        case p: GpuExec =>
+          val partMeta = GpuOverrides.wrapPart(partitioning, rapidsConf, None)
+          partMeta.tagForGpu()
+          if (partMeta.canThisBeReplaced) {
+            val plan = GpuOptimizeWriteExchangeExec(partMeta.convertToGpu(), p)
+            if (GpuShuffleEnv.useGPUShuffle(rapidsConf)) {
+              GpuCoalesceBatches(plan, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+            } else {
+              GpuShuffleCoalesceExec(plan, rapidsConf.gpuTargetBatchSizeBytes)
+            }
+          } else {
+            GpuColumnarToRowExec(OptimizeWriteExchangeExec(partitioning, p))
+          }
+        case p =>
+          OptimizeWriteExchangeExec(partitioning, p)
+      }
+    } else {
+      physicalPlan
+    }
+  }
+
+  protected def isOptimizeCommand(plan: LogicalPlan): Boolean = {
+    val leaves = plan.collectLeaves()
+    leaves.size == 1 && leaves.head.collect {
+      case LogicalRelation(HadoopFsRelation(
+      index: TahoeBatchFileIndex, _, _, _, _, _), _, _, _) =>
+        index.actionType.equals("Optimize")
+    }.headOption.getOrElse(false)
+  }
+
+  protected def convertToCpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuRowToColumnarExec(p, _) => p
+    case p: GpuExec => GpuColumnarToRowExec(p)
+    case p => p
+  }
+
+  protected def convertToGpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuColumnarToRowExec(p, _) => p
+    case p: GpuExec => p
+    case p => GpuRowToColumnarExec(p, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+  }
+}
diff --git a/delta-lake/delta-spark341db/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala b/delta-lake/delta-spark341db/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
index e109b81f1e5..088a2a788da 100644
--- a/delta-lake/delta-spark341db/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
+++ b/delta-lake/delta-spark341db/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
@@ -113,7 +113,7 @@ object GpuDeltaParquetFileFormat {
       meta.willNotWorkOnGpu(
         s"reading metadata column $IS_ROW_DELETED_COLUMN_NAME is not supported")
     }
-    if (format.hasDeletionVectorMap()) {
+    if (format.hasDeletionVectorMap) {
       meta.willNotWorkOnGpu("deletion vectors are not supported")
     }
   }
diff --git a/delta-lake/delta-spark350db143/pom.xml b/delta-lake/delta-spark350db143/pom.xml
new file mode 100644
index 00000000000..333a035a680
--- /dev/null
+++ b/delta-lake/delta-spark350db143/pom.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (c) 2023-2024 NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.nvidia</groupId>
+        <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
+        <version>24.12.0</version>
+        <relativePath>../../shim-deps/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>rapids-4-spark-delta-spark350db143_2.12</artifactId>
+    <name>RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support</name>
+    <description>Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
+    <version>24.12.0</version>
+
+    <properties>
+        <rapids.compressed.artifact>false</rapids.compressed.artifact>
+        <rapids.default.jar.excludePattern>**/*</rapids.default.jar.excludePattern>
+        <rapids.shim.jar.phase>package</rapids.shim.jar.phase>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.roaringbitmap</groupId>
+            <artifactId>RoaringBitmap</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>rapids-4-spark-sql_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+            <scope>provided</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>add-common-sources</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>${project.basedir}/../common/src/main/scala</source>
+                                <source>${project.basedir}/../common/src/main/databricks/scala</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuCreateDeltaTableCommand.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuCreateDeltaTableCommand.scala
new file mode 100644
index 00000000000..ace7a13966c
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuCreateDeltaTableCommand.scala
@@ -0,0 +1,464 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from CreateDeltaTableCommand.scala in the
+ * Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.Metadata
+import com.databricks.sql.transaction.tahoe.commands.{TableCreationModes, WriteIntoDelta}
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+import com.databricks.sql.transaction.tahoe.schema.SchemaUtils
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids.RapidsConf
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.execution.command.{LeafRunnableCommand, RunnableCommand}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Single entry point for all write or declaration operations for Delta tables accessed through
+ * the table name.
+ *
+ * @param table The table identifier for the Delta table
+ * @param existingTableOpt The existing table for the same identifier if exists
+ * @param mode The save mode when writing data. Relevant when the query is empty or set to Ignore
+ *             with `CREATE TABLE IF NOT EXISTS`.
+ * @param query The query to commit into the Delta table if it exist. This can come from
+ *                - CTAS
+ *                - saveAsTable
+ */
+case class GpuCreateDeltaTableCommand(
+    table: CatalogTable,
+    existingTableOpt: Option[CatalogTable],
+    mode: SaveMode,
+    query: Option[LogicalPlan],
+    operation: TableCreationModes.CreationMode = TableCreationModes.Create,
+    tableByPath: Boolean = false,
+    override val output: Seq[Attribute] = Nil)(@transient rapidsConf: RapidsConf)
+  extends LeafRunnableCommand
+  with DeltaLogging {
+
+  override def otherCopyArgs: Seq[AnyRef] = Seq(rapidsConf)
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val table = this.table
+
+    assert(table.tableType != CatalogTableType.VIEW)
+    assert(table.identifier.database.isDefined, "Database should've been fixed at analysis")
+    // There is a subtle race condition here, where the table can be created by someone else
+    // while this command is running. Nothing we can do about that though :(
+    val tableExists = existingTableOpt.isDefined
+    if (mode == SaveMode.Ignore && tableExists) {
+      // Early exit on ignore
+      return Nil
+    } else if (mode == SaveMode.ErrorIfExists && tableExists) {
+      throw DeltaErrors.tableAlreadyExists(table)
+    }
+
+    val tableWithLocation = if (tableExists) {
+      val existingTable = existingTableOpt.get
+      table.storage.locationUri match {
+        case Some(location) if location.getPath != existingTable.location.getPath =>
+          throw DeltaErrors.tableLocationMismatch(table, existingTable)
+        case _ =>
+      }
+      table.copy(
+        storage = existingTable.storage,
+        tableType = existingTable.tableType)
+    } else if (table.storage.locationUri.isEmpty) {
+      // We are defining a new managed table
+      assert(table.tableType == CatalogTableType.MANAGED)
+      val loc = sparkSession.sessionState.catalog.defaultTablePath(table.identifier)
+      table.copy(storage = table.storage.copy(locationUri = Some(loc)))
+    } else {
+      // 1. We are defining a new external table
+      // 2. It's a managed table which already has the location populated. This can happen in DSV2
+      //    CTAS flow.
+      table
+    }
+
+    val isManagedTable = tableWithLocation.tableType == CatalogTableType.MANAGED
+    val tableLocation = new Path(tableWithLocation.location)
+    val gpuDeltaLog = GpuDeltaLog.forTable(sparkSession, tableLocation, rapidsConf)
+    val hadoopConf = gpuDeltaLog.deltaLog.newDeltaHadoopConf()
+    val fs = tableLocation.getFileSystem(hadoopConf)
+    val options = new DeltaOptions(table.storage.properties, sparkSession.sessionState.conf)
+    var result: Seq[Row] = Nil
+
+    recordDeltaOperation(gpuDeltaLog.deltaLog, "delta.ddl.createTable") {
+      val txn = gpuDeltaLog.startTransaction()
+      val opStartTs = System.currentTimeMillis()
+      if (query.isDefined) {
+        // If the mode is Ignore or ErrorIfExists, the table must not exist, or we would return
+        // earlier. And the data should not exist either, to match the behavior of
+        // Ignore/ErrorIfExists mode. This means the table path should not exist or is empty.
+        if (mode == SaveMode.Ignore || mode == SaveMode.ErrorIfExists) {
+          assert(!tableExists)
+          // We may have failed a previous write. The retry should still succeed even if we have
+          // garbage data
+          if (txn.readVersion > -1 || !fs.exists(gpuDeltaLog.deltaLog.logPath)) {
+            assertPathEmpty(hadoopConf, tableWithLocation)
+          }
+        }
+        // We are either appending/overwriting with saveAsTable or creating a new table with CTAS or
+        // we are creating a table as part of a RunnableCommand
+        query.get match {
+          case writer: WriteIntoDelta =>
+            // In the V2 Writer, methods like "replace" and "createOrReplace" implicitly mean that
+            // the metadata should be changed. This wasn't the behavior for DataFrameWriterV1.
+            if (!isV1Writer) {
+              replaceMetadataIfNecessary(
+                txn, tableWithLocation, options, writer.data.schema.asNullable)
+            }
+            val actions = writer.write(txn, sparkSession)
+            val op = getOperation(txn.metadata, isManagedTable, Some(options))
+              txn.commit(actions, op)
+          case cmd: RunnableCommand =>
+            result = cmd.run(sparkSession)
+          case other =>
+            // When using V1 APIs, the `other` plan is not yet optimized, therefore, it is safe
+            // to once again go through analysis
+            val data = Dataset.ofRows(sparkSession, other)
+
+            // In the V2 Writer, methods like "replace" and "createOrReplace" implicitly mean that
+            // the metadata should be changed. This wasn't the behavior for DataFrameWriterV1.
+            if (!isV1Writer) {
+              replaceMetadataIfNecessary(
+                txn, tableWithLocation, options, other.schema.asNullable)
+            }
+
+            val actions = WriteIntoDelta(
+              deltaLog = gpuDeltaLog.deltaLog,
+              mode = mode,
+              options,
+              partitionColumns = table.partitionColumnNames,
+              configuration = tableWithLocation.properties + ("comment" -> table.comment.orNull),
+              data = data).write(txn, sparkSession)
+
+            val op = getOperation(txn.metadata, isManagedTable, Some(options))
+            txn.commit(actions, op)
+        }
+      } else {
+        def createTransactionLogOrVerify(): Unit = {
+          if (isManagedTable) {
+            // When creating a managed table, the table path should not exist or is empty, or
+            // users would be surprised to see the data, or see the data directory being dropped
+            // after the table is dropped.
+            assertPathEmpty(hadoopConf, tableWithLocation)
+          }
+
+          // This is either a new table, or, we never defined the schema of the table. While it is
+          // unexpected that `txn.metadata.schema` to be empty when txn.readVersion >= 0, we still
+          // guard against it, in case of checkpoint corruption bugs.
+          val noExistingMetadata = txn.readVersion == -1 || txn.metadata.schema.isEmpty
+          if (noExistingMetadata) {
+            assertTableSchemaDefined(fs, tableLocation, tableWithLocation, txn, sparkSession)
+            assertPathEmpty(hadoopConf, tableWithLocation)
+            // This is a user provided schema.
+            // Doesn't come from a query, Follow nullability invariants.
+            val newMetadata = getProvidedMetadata(tableWithLocation, table.schema.json)
+            txn.updateMetadataForNewTable(newMetadata)
+
+            val op = getOperation(newMetadata, isManagedTable, None)
+            txn.commit(Nil, op)
+          } else {
+            verifyTableMetadata(txn, tableWithLocation)
+          }
+        }
+        // We are defining a table using the Create or Replace Table statements.
+        operation match {
+          case TableCreationModes.Create =>
+            require(!tableExists, "Can't recreate a table when it exists")
+            createTransactionLogOrVerify()
+
+          case TableCreationModes.CreateOrReplace if !tableExists =>
+            // If the table doesn't exist, CREATE OR REPLACE must provide a schema
+            if (tableWithLocation.schema.isEmpty) {
+              throw DeltaErrors.schemaNotProvidedException
+            }
+            createTransactionLogOrVerify()
+          case _ =>
+            // When the operation is a REPLACE or CREATE OR REPLACE, then the schema shouldn't be
+            // empty, since we'll use the entry to replace the schema
+            if (tableWithLocation.schema.isEmpty) {
+              throw DeltaErrors.schemaNotProvidedException
+            }
+            // We need to replace
+            replaceMetadataIfNecessary(txn, tableWithLocation, options, tableWithLocation.schema)
+            // Truncate the table
+            val operationTimestamp = System.currentTimeMillis()
+            val removes = txn.filterFiles().map(_.removeWithTimestamp(operationTimestamp))
+            val op = getOperation(txn.metadata, isManagedTable, None)
+            txn.commit(removes, op)
+        }
+      }
+
+      // We would have failed earlier on if we couldn't ignore the existence of the table
+      // In addition, we just might using saveAsTable to append to the table, so ignore the creation
+      // if it already exists.
+      // Note that someone may have dropped and recreated the table in a separate location in the
+      // meantime... Unfortunately we can't do anything there at the moment, because Hive sucks.
+      logInfo(s"Table is path-based table: $tableByPath. Update catalog with mode: $operation")
+      updateCatalog(
+        sparkSession,
+        tableWithLocation,
+        gpuDeltaLog.deltaLog.update(checkIfUpdatedSinceTs = Some(opStartTs)),
+        txn)
+
+      result
+    }
+  }
+
+  private def getProvidedMetadata(table: CatalogTable, schemaString: String): Metadata = {
+    Metadata(
+      description = table.comment.orNull,
+      schemaString = schemaString,
+      partitionColumns = table.partitionColumnNames,
+      configuration = table.properties,
+      createdTime = Some(System.currentTimeMillis()))
+  }
+
+  private def assertPathEmpty(
+      hadoopConf: Configuration,
+      tableWithLocation: CatalogTable): Unit = {
+    val path = new Path(tableWithLocation.location)
+    val fs = path.getFileSystem(hadoopConf)
+    // Verify that the table location associated with CREATE TABLE doesn't have any data. Note that
+    // we intentionally diverge from this behavior w.r.t regular datasource tables (that silently
+    // overwrite any previous data)
+    if (fs.exists(path) && fs.listStatus(path).nonEmpty) {
+      throw DeltaErrors.createTableWithNonEmptyLocation(
+        tableWithLocation.identifier.toString,
+        tableWithLocation.location.toString)
+    }
+  }
+
+  private def assertTableSchemaDefined(
+      fs: FileSystem,
+      path: Path,
+      table: CatalogTable,
+      txn: OptimisticTransaction,
+      sparkSession: SparkSession): Unit = {
+    // If we allow creating an empty schema table and indeed the table is new, we just need to
+    // make sure:
+    // 1. txn.readVersion == -1 to read a new table
+    // 2. for external tables: path must either doesn't exist or is completely empty
+    val allowCreatingTableWithEmptySchema = sparkSession.sessionState
+      .conf.getConf(DeltaSQLConf.DELTA_ALLOW_CREATE_EMPTY_SCHEMA_TABLE) && txn.readVersion == -1
+
+    // Users did not specify the schema. We expect the schema exists in Delta.
+    if (table.schema.isEmpty) {
+      if (table.tableType == CatalogTableType.EXTERNAL) {
+        if (fs.exists(path) && fs.listStatus(path).nonEmpty) {
+          throw DeltaErrors.createExternalTableWithoutLogException(
+            path, table.identifier.quotedString, sparkSession)
+        } else {
+          if (allowCreatingTableWithEmptySchema) return
+          throw DeltaErrors.createExternalTableWithoutSchemaException(
+            path, table.identifier.quotedString, sparkSession)
+        }
+      } else {
+        if (allowCreatingTableWithEmptySchema) return
+        throw DeltaErrors.createManagedTableWithoutSchemaException(
+          table.identifier.quotedString, sparkSession)
+      }
+    }
+  }
+
+  /**
+   * Verify against our transaction metadata that the user specified the right metadata for the
+   * table.
+   */
+  private def verifyTableMetadata(
+      txn: OptimisticTransaction,
+      tableDesc: CatalogTable): Unit = {
+    val existingMetadata = txn.metadata
+    val path = new Path(tableDesc.location)
+
+    // The delta log already exists. If they give any configuration, we'll make sure it all matches.
+    // Otherwise we'll just go with the metadata already present in the log.
+    // The schema compatibility checks will be made in `WriteIntoDelta` for CreateTable
+    // with a query
+    if (txn.readVersion > -1) {
+      if (tableDesc.schema.nonEmpty) {
+        // We check exact alignment on create table if everything is provided
+        // However, if in column mapping mode, we can safely ignore the related metadata fields in
+        // existing metadata because new table desc will not have related metadata assigned yet
+        val differences = SchemaUtils.reportDifferences(
+          DeltaColumnMapping.dropColumnMappingMetadata(existingMetadata.schema),
+          tableDesc.schema)
+        if (differences.nonEmpty) {
+          throw DeltaErrors.createTableWithDifferentSchemaException(
+            path, tableDesc.schema, existingMetadata.schema, differences)
+        }
+      }
+
+      // If schema is specified, we must make sure the partitioning matches, even the partitioning
+      // is not specified.
+      if (tableDesc.schema.nonEmpty &&
+        tableDesc.partitionColumnNames != existingMetadata.partitionColumns) {
+        throw DeltaErrors.createTableWithDifferentPartitioningException(
+          path, tableDesc.partitionColumnNames, existingMetadata.partitionColumns)
+      }
+
+      if (tableDesc.properties.nonEmpty && tableDesc.properties != existingMetadata.configuration) {
+        throw DeltaErrors.createTableWithDifferentPropertiesException(
+          path, tableDesc.properties, existingMetadata.configuration)
+      }
+    }
+  }
+
+  /**
+   * Based on the table creation operation, and parameters, we can resolve to different operations.
+   * A lot of this is needed for legacy reasons in Databricks Runtime.
+   * @param metadata The table metadata, which we are creating or replacing
+   * @param isManagedTable Whether we are creating or replacing a managed table
+   * @param options Write options, if this was a CTAS/RTAS
+   */
+  private def getOperation(
+      metadata: Metadata,
+      isManagedTable: Boolean,
+      options: Option[DeltaOptions]): DeltaOperations.Operation = operation match {
+    // This is legacy saveAsTable behavior in Databricks Runtime
+    case TableCreationModes.Create if existingTableOpt.isDefined && query.isDefined =>
+      DeltaOperations.Write(mode, Option(table.partitionColumnNames), options.get.replaceWhere,
+        options.flatMap(_.userMetadata))
+
+    // DataSourceV2 table creation
+    // CREATE TABLE (non-DataFrameWriter API) doesn't have options syntax
+    // (userMetadata uses SQLConf in this case)
+    case TableCreationModes.Create =>
+      DeltaOperations.CreateTable(metadata, isManagedTable, query.isDefined)
+
+    // DataSourceV2 table replace
+    // REPLACE TABLE (non-DataFrameWriter API) doesn't have options syntax
+    // (userMetadata uses SQLConf in this case)
+    case TableCreationModes.Replace =>
+      DeltaOperations.ReplaceTable(metadata, isManagedTable, orCreate = false, query.isDefined)
+
+    // Legacy saveAsTable with Overwrite mode
+    case TableCreationModes.CreateOrReplace if options.exists(_.replaceWhere.isDefined) =>
+      DeltaOperations.Write(mode, Option(table.partitionColumnNames), options.get.replaceWhere,
+        options.flatMap(_.userMetadata))
+
+    // New DataSourceV2 saveAsTable with overwrite mode behavior
+    case TableCreationModes.CreateOrReplace =>
+      DeltaOperations.ReplaceTable(metadata, isManagedTable, orCreate = true, query.isDefined,
+        options.flatMap(_.userMetadata))
+  }
+
+  /**
+   * Similar to getOperation, here we disambiguate the catalog alterations we need to do based
+   * on the table operation, and whether we have reached here through legacy code or DataSourceV2
+   * code paths.
+   */
+  private def updateCatalog(
+      spark: SparkSession,
+      table: CatalogTable,
+      snapshot: Snapshot,
+      txn: OptimisticTransaction): Unit = {
+    val cleaned = cleanupTableDefinition(table, snapshot)
+    operation match {
+      case _ if tableByPath => // do nothing with the metastore if this is by path
+      case TableCreationModes.Create =>
+        spark.sessionState.catalog.createTable(
+          cleaned,
+          ignoreIfExists = existingTableOpt.isDefined,
+          validateLocation = false)
+      case TableCreationModes.Replace | TableCreationModes.CreateOrReplace
+          if existingTableOpt.isDefined =>
+        spark.sessionState.catalog.alterTable(table)
+      case TableCreationModes.Replace =>
+        val ident = Identifier.of(table.identifier.database.toArray, table.identifier.table)
+        throw DeltaErrors.cannotReplaceMissingTableException(ident)
+      case TableCreationModes.CreateOrReplace =>
+        spark.sessionState.catalog.createTable(
+          cleaned,
+          ignoreIfExists = false,
+          validateLocation = false)
+    }
+  }
+
+  /** Clean up the information we pass on to store in the catalog. */
+  private def cleanupTableDefinition(table: CatalogTable, snapshot: Snapshot): CatalogTable = {
+    // These actually have no effect on the usability of Delta, but feature flagging legacy
+    // behavior for now
+    val storageProps = if (conf.getConf(DeltaSQLConf.DELTA_LEGACY_STORE_WRITER_OPTIONS_AS_PROPS)) {
+      // Legacy behavior
+      table.storage
+    } else {
+      table.storage.copy(properties = Map.empty)
+    }
+
+    table.copy(
+      schema = new StructType(),
+      properties = Map.empty,
+      partitionColumnNames = Nil,
+      // Remove write specific options when updating the catalog
+      storage = storageProps,
+      tracksPartitionsInCatalog = true)
+  }
+
+  /**
+   * With DataFrameWriterV2, methods like `replace()` or `createOrReplace()` mean that the
+   * metadata of the table should be replaced. If overwriteSchema=false is provided with these
+   * methods, then we will verify that the metadata match exactly.
+   */
+  private def replaceMetadataIfNecessary(
+      txn: OptimisticTransaction,
+      tableDesc: CatalogTable,
+      options: DeltaOptions,
+      schema: StructType): Unit = {
+    val isReplace = (operation == TableCreationModes.CreateOrReplace ||
+        operation == TableCreationModes.Replace)
+    // If a user explicitly specifies not to overwrite the schema, during a replace, we should
+    // tell them that it's not supported
+    val dontOverwriteSchema = options.options.contains(DeltaOptions.OVERWRITE_SCHEMA_OPTION) &&
+      !options.canOverwriteSchema
+    if (isReplace && dontOverwriteSchema) {
+      throw DeltaErrors.illegalUsageException(DeltaOptions.OVERWRITE_SCHEMA_OPTION, "replacing")
+    }
+    if (txn.readVersion > -1L && isReplace && !dontOverwriteSchema) {
+      // When a table already exists, and we're using the DataFrameWriterV2 API to replace
+      // or createOrReplace a table, we blindly overwrite the metadata.
+      txn.updateMetadataForNewTable(getProvidedMetadata(table, schema.json))
+    }
+  }
+
+  /**
+   * Horrible hack to differentiate between DataFrameWriterV1 and V2 so that we can decide
+   * what to do with table metadata. In DataFrameWriterV1, mode("overwrite").saveAsTable,
+   * behaves as a CreateOrReplace table, but we have asked for "overwriteSchema" as an
+   * explicit option to overwrite partitioning or schema information. With DataFrameWriterV2,
+   * the behavior asked for by the user is clearer: .createOrReplace(), which means that we
+   * should overwrite schema and/or partitioning. Therefore we have this hack.
+   */
+  private def isV1Writer: Boolean = {
+    Thread.currentThread().getStackTrace.exists(_.toString.contains(
+      classOf[DataFrameWriter[_]].getCanonicalName + "."))
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeleteCommand.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeleteCommand.scala
new file mode 100644
index 00000000000..f49a42d2ed0
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeleteCommand.scala
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from DeleteCommand.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe.{DeltaConfigs, DeltaLog, DeltaOperations, DeltaTableUtils, DeltaUDF, OptimisticTransaction}
+import com.databricks.sql.transaction.tahoe.DeltaCommitTag._
+import com.databricks.sql.transaction.tahoe.RowTracking
+import com.databricks.sql.transaction.tahoe.actions.{AddCDCFile, FileAction}
+import com.databricks.sql.transaction.tahoe.commands.{DeleteCommandMetrics, DeleteMetric, DeltaCommand, DMLUtils}
+import com.databricks.sql.transaction.tahoe.commands.MergeIntoCommandBase.totalBytesAndDistinctPartitionValues
+import com.databricks.sql.transaction.tahoe.files.TahoeBatchFileIndex
+import com.databricks.sql.transaction.tahoe.rapids.GpuDeleteCommand.{rewritingFilesMsg, FINDING_TOUCHED_FILES_MSG}
+import com.nvidia.spark.rapids.delta.GpuDeltaMetricUpdateUDF
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, EqualNullSafe, Expression, If, Literal, Not}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.functions.input_file_name
+import org.apache.spark.sql.types.LongType
+
+/**
+ * GPU version of Delta Lake DeleteCommand.
+ *
+ * Performs a Delete based on the search condition
+ *
+ * Algorithm:
+ *   1) Scan all the files and determine which files have
+ *      the rows that need to be deleted.
+ *   2) Traverse the affected files and rebuild the touched files.
+ *   3) Use the Delta protocol to atomically write the remaining rows to new files and remove
+ *      the affected files that are identified in step 1.
+ */
+case class GpuDeleteCommand(
+    gpuDeltaLog: GpuDeltaLog,
+    target: LogicalPlan,
+    condition: Option[Expression])
+    extends LeafRunnableCommand with DeltaCommand with DeleteCommandMetrics {
+
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(target)
+
+  override val output: Seq[Attribute] = Seq(AttributeReference("num_affected_rows", LongType)())
+
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+
+  // DeleteCommandMetrics does not include deletion vector metrics, so add them here because
+  // the commit command needs to collect these metrics for inclusion in the delta log event
+  override lazy val metrics = createMetrics ++ Map(
+    "numDeletionVectorsAdded" -> SQLMetrics.createMetric(sc, "number of deletion vectors added."),
+    "numDeletionVectorsRemoved" ->
+      SQLMetrics.createMetric(sc, "number of deletion vectors removed."),
+    "numDeletionVectorsUpdated" ->
+      SQLMetrics.createMetric(sc, "number of deletion vectors updated.")
+  )
+
+  final override def run(sparkSession: SparkSession): Seq[Row] = {
+    val deltaLog = gpuDeltaLog.deltaLog
+    recordDeltaOperation(gpuDeltaLog.deltaLog, "delta.dml.delete") {
+      gpuDeltaLog.withNewTransaction { txn =>
+        DeltaLog.assertRemovable(txn.snapshot)
+        val deleteCommitTags = performDelete(sparkSession, deltaLog, txn)
+        val deleteActions = deleteCommitTags.actions
+        if (deleteActions.nonEmpty) {
+          txn.commitIfNeeded(deleteActions, DeltaOperations.Delete(condition.toSeq),
+            deleteCommitTags.stringTags)
+        }
+      }
+      // Re-cache all cached plans(including this relation itself, if it's cached) that refer to
+      // this data source relation.
+      sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target)
+    }
+
+    // Adjust for deletes at partition boundaries. Deletes at partition boundaries is a metadata
+    // operation, therefore we don't actually have any information around how many rows were deleted
+    // While this info may exist in the file statistics, it's not guaranteed that we have these
+    // statistics. To avoid any performance regressions, we currently just return a -1 in such cases
+    if (metrics("numRemovedFiles").value > 0 && metrics("numDeletedRows").value == 0) {
+      Seq(Row(-1L))
+    } else {
+      Seq(Row(metrics("numDeletedRows").value))
+    }
+  }
+
+  def performDelete(
+      sparkSession: SparkSession,
+      deltaLog: DeltaLog,
+      txn: OptimisticTransaction): DMLUtils.TaggedCommitData = {
+    import com.databricks.sql.transaction.tahoe.implicits._
+
+    var numRemovedFiles: Long = 0
+    var numAddedFiles: Long = 0
+    var numAddedChangeFiles: Long = 0
+    var scanTimeMs: Long = 0
+    var rewriteTimeMs: Long = 0
+    var numBytesAdded: Long = 0
+    var changeFileBytes: Long = 0
+    var numBytesRemoved: Long = 0
+    var numFilesBeforeSkipping: Long = 0
+    var numBytesBeforeSkipping: Long = 0
+    var numFilesAfterSkipping: Long = 0
+    var numBytesAfterSkipping: Long = 0
+    var numPartitionsAfterSkipping: Option[Long] = None
+    var numPartitionsRemovedFrom: Option[Long] = None
+    var numPartitionsAddedTo: Option[Long] = None
+    var numDeletedRows: Option[Long] = None
+    var numCopiedRows: Option[Long] = None
+
+    val startTime = System.nanoTime()
+    val numFilesTotal = txn.snapshot.numOfFiles
+
+    val deleteActions: Seq[FileAction] = condition match {
+      case None =>
+        // Case 1: Delete the whole table if the condition is true
+        val allFiles = txn.filterFiles(Nil)
+
+        numRemovedFiles = allFiles.size
+        scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+        val (numBytes, numPartitions) = totalBytesAndDistinctPartitionValues(allFiles)
+        numBytesRemoved = numBytes
+        numFilesBeforeSkipping = numRemovedFiles
+        numBytesBeforeSkipping = numBytes
+        numFilesAfterSkipping = numRemovedFiles
+        numBytesAfterSkipping = numBytes
+        if (txn.metadata.partitionColumns.nonEmpty) {
+          numPartitionsAfterSkipping = Some(numPartitions)
+          numPartitionsRemovedFrom = Some(numPartitions)
+          numPartitionsAddedTo = Some(0)
+        }
+        val operationTimestamp = System.currentTimeMillis()
+        allFiles.map(_.removeWithTimestamp(operationTimestamp))
+      case Some(cond) =>
+        val (metadataPredicates, otherPredicates) =
+          DeltaTableUtils.splitMetadataAndDataPredicates(
+            cond, txn.metadata.partitionColumns, sparkSession)
+
+        numFilesBeforeSkipping = txn.snapshot.numOfFiles
+        numBytesBeforeSkipping = txn.snapshot.sizeInBytes
+
+        if (otherPredicates.isEmpty) {
+          // Case 2: The condition can be evaluated using metadata only.
+          //         Delete a set of files without the need of scanning any data files.
+          val operationTimestamp = System.currentTimeMillis()
+          val candidateFiles = txn.filterFiles(metadataPredicates)
+
+          scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+          numRemovedFiles = candidateFiles.size
+          numBytesRemoved = candidateFiles.map(_.size).sum
+          numFilesAfterSkipping = candidateFiles.size
+          val (numCandidateBytes, numCandidatePartitions) =
+            totalBytesAndDistinctPartitionValues(candidateFiles)
+          numBytesAfterSkipping = numCandidateBytes
+          if (txn.metadata.partitionColumns.nonEmpty) {
+            numPartitionsAfterSkipping = Some(numCandidatePartitions)
+            numPartitionsRemovedFrom = Some(numCandidatePartitions)
+            numPartitionsAddedTo = Some(0)
+          }
+          candidateFiles.map(_.removeWithTimestamp(operationTimestamp))
+        } else {
+          // Case 3: Delete the rows based on the condition.
+          val candidateFiles = txn.filterFiles(metadataPredicates ++ otherPredicates)
+
+          numFilesAfterSkipping = candidateFiles.size
+          val (numCandidateBytes, numCandidatePartitions) =
+            totalBytesAndDistinctPartitionValues(candidateFiles)
+          numBytesAfterSkipping = numCandidateBytes
+          if (txn.metadata.partitionColumns.nonEmpty) {
+            numPartitionsAfterSkipping = Some(numCandidatePartitions)
+          }
+
+          val nameToAddFileMap = generateCandidateFileMap(deltaLog.dataPath, candidateFiles)
+
+          val fileIndex = new TahoeBatchFileIndex(
+            sparkSession, "delete", candidateFiles, deltaLog, deltaLog.dataPath, txn.snapshot)
+          // Keep everything from the resolved target except a new TahoeFileIndex
+          // that only involves the affected files instead of all files.
+          val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex)
+          val data = Dataset.ofRows(sparkSession, newTarget)
+          val deletedRowCount = metrics("numDeletedRows")
+          val deletedRowUdf = DeltaUDF.boolean {
+            new GpuDeltaMetricUpdateUDF(deletedRowCount)
+          }.asNondeterministic()
+          val filesToRewrite =
+            withStatusCode("DELTA", FINDING_TOUCHED_FILES_MSG) {
+              if (candidateFiles.isEmpty) {
+                Array.empty[String]
+              } else {
+                data.filter(new Column(cond))
+                    .select(input_file_name())
+                    .filter(deletedRowUdf())
+                    .distinct()
+                    .as[String]
+                    .collect()
+              }
+            }
+
+          numRemovedFiles = filesToRewrite.length
+          scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+          if (filesToRewrite.isEmpty) {
+            // Case 3.1: no row matches and no delete will be triggered
+            if (txn.metadata.partitionColumns.nonEmpty) {
+              numPartitionsRemovedFrom = Some(0)
+              numPartitionsAddedTo = Some(0)
+            }
+            Nil
+          } else {
+            // Case 3.2: some files need an update to remove the deleted files
+            // Do the second pass and just read the affected files
+            val baseRelation = buildBaseRelation(
+              sparkSession, txn, "delete", deltaLog.dataPath, filesToRewrite, nameToAddFileMap)
+            // Keep everything from the resolved target except a new TahoeFileIndex
+            // that only involves the affected files instead of all files.
+            val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location)
+            val targetDF = Dataset.ofRows(sparkSession, newTarget)
+            val filterCond = Not(EqualNullSafe(cond, Literal.TrueLiteral))
+            val rewrittenActions = rewriteFiles(txn, targetDF, filterCond, filesToRewrite.length)
+            val (changeFiles, rewrittenFiles) = rewrittenActions
+                .partition(_.isInstanceOf[AddCDCFile])
+            numAddedFiles = rewrittenFiles.size
+            val removedFiles = filesToRewrite.map(f =>
+              getTouchedFile(deltaLog.dataPath, f, nameToAddFileMap))
+            val (removedBytes, removedPartitions) =
+              totalBytesAndDistinctPartitionValues(removedFiles)
+            numBytesRemoved = removedBytes
+            val (rewrittenBytes, rewrittenPartitions) =
+              totalBytesAndDistinctPartitionValues(rewrittenFiles)
+            numBytesAdded = rewrittenBytes
+            if (txn.metadata.partitionColumns.nonEmpty) {
+              numPartitionsRemovedFrom = Some(removedPartitions)
+              numPartitionsAddedTo = Some(rewrittenPartitions)
+            }
+            numAddedChangeFiles = changeFiles.size
+            changeFileBytes = changeFiles.collect { case f: AddCDCFile => f.size }.sum
+            rewriteTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 - scanTimeMs
+            numDeletedRows = Some(metrics("numDeletedRows").value)
+            numCopiedRows = Some(metrics("numTouchedRows").value - metrics("numDeletedRows").value)
+
+            val operationTimestamp = System.currentTimeMillis()
+            removeFilesFromPaths(deltaLog, nameToAddFileMap, filesToRewrite,
+              operationTimestamp) ++ rewrittenActions
+          }
+        }
+    }
+    metrics("numRemovedFiles").set(numRemovedFiles)
+    metrics("numAddedFiles").set(numAddedFiles)
+    val executionTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+    metrics("executionTimeMs").set(executionTimeMs)
+    metrics("scanTimeMs").set(scanTimeMs)
+    metrics("rewriteTimeMs").set(rewriteTimeMs)
+    metrics("numAddedChangeFiles").set(numAddedChangeFiles)
+    metrics("changeFileBytes").set(changeFileBytes)
+    metrics("numAddedBytes").set(numBytesAdded)
+    metrics("numRemovedBytes").set(numBytesRemoved)
+    metrics("numFilesBeforeSkipping").set(numFilesBeforeSkipping)
+    metrics("numBytesBeforeSkipping").set(numBytesBeforeSkipping)
+    metrics("numFilesAfterSkipping").set(numFilesAfterSkipping)
+    metrics("numBytesAfterSkipping").set(numBytesAfterSkipping)
+    numPartitionsAfterSkipping.foreach(metrics("numPartitionsAfterSkipping").set)
+    numPartitionsAddedTo.foreach(metrics("numPartitionsAddedTo").set)
+    numPartitionsRemovedFrom.foreach(metrics("numPartitionsRemovedFrom").set)
+    numCopiedRows.foreach(metrics("numCopiedRows").set)
+    metrics("numDeletionVectorsAdded").set(0)
+    metrics("numDeletionVectorsRemoved").set(0)
+    metrics("numDeletionVectorsUpdated").set(0)
+    txn.registerSQLMetrics(sparkSession, metrics)
+    // This is needed to make the SQL metrics visible in the Spark UI
+    val executionId = sparkSession.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(
+      sparkSession.sparkContext, executionId, metrics.values.toSeq)
+
+    recordDeltaEvent(
+      deltaLog,
+      "delta.dml.delete.stats",
+      data = DeleteMetric(
+        condition = condition.map(_.sql).getOrElse("true"),
+        numFilesTotal,
+        numFilesAfterSkipping,
+        numAddedFiles,
+        numRemovedFiles,
+        numAddedFiles,
+        numAddedChangeFiles = numAddedChangeFiles,
+        numFilesBeforeSkipping,
+        numBytesBeforeSkipping,
+        numFilesAfterSkipping,
+        numBytesAfterSkipping,
+        numPartitionsAfterSkipping,
+        numPartitionsAddedTo,
+        numPartitionsRemovedFrom,
+        numCopiedRows,
+        numDeletedRows,
+        numBytesAdded,
+        numBytesRemoved,
+        changeFileBytes = changeFileBytes,
+        scanTimeMs,
+        rewriteTimeMs, 
+        // We don't support deletion vectors
+        numDeletionVectorsAdded = 0,
+        numDeletionVectorsRemoved = 0,
+        numDeletionVectorsUpdated = 0)
+        
+    )
+
+    DMLUtils.TaggedCommitData(deleteActions)
+      .withTag(PreservedRowTrackingTag, RowTracking.isEnabled(txn.protocol, txn.metadata))
+      .withTag(NoRowsCopiedTag, metrics("numCopiedRows").value == 0)
+  }
+
+  /**
+   * Returns the list of `AddFile`s and `AddCDCFile`s that have been re-written.
+   */
+  private def rewriteFiles(
+      txn: OptimisticTransaction,
+      baseData: DataFrame,
+      filterCondition: Expression,
+      numFilesToRewrite: Long): Seq[FileAction] = {
+    val shouldWriteCdc = DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(txn.metadata)
+
+    // number of total rows that we have seen / are either copying or deleting (sum of both).
+    val numTouchedRows = metrics("numTouchedRows")
+    val numTouchedRowsUdf = DeltaUDF.boolean {
+      new GpuDeltaMetricUpdateUDF(numTouchedRows)
+    }.asNondeterministic()
+
+    withStatusCode(
+      "DELTA", rewritingFilesMsg(numFilesToRewrite)) {
+      val dfToWrite = if (shouldWriteCdc) {
+        import com.databricks.sql.transaction.tahoe.commands.cdc.CDCReader._
+        // The logic here ends up being surprisingly elegant, with all source rows ending up in
+        // the output. Recall that we flipped the user-provided delete condition earlier, before the
+        // call to `rewriteFiles`. All rows which match this latest `filterCondition` are retained
+        // as table data, while all rows which don't match are removed from the rewritten table data
+        // but do get included in the output as CDC events.
+        baseData
+            .filter(numTouchedRowsUdf())
+            .withColumn(
+              CDC_TYPE_COLUMN_NAME,
+              new Column(If(filterCondition, CDC_TYPE_NOT_CDC, CDC_TYPE_DELETE))
+            )
+      } else {
+        baseData
+            .filter(numTouchedRowsUdf())
+            .filter(new Column(filterCondition))
+      }
+
+      txn.writeFiles(dfToWrite)
+    }
+  }
+}
+
+object GpuDeleteCommand {
+  val FINDING_TOUCHED_FILES_MSG: String = "Finding files to rewrite for DELETE operation"
+
+  def rewritingFilesMsg(numFilesToRewrite: Long): String =
+    s"Rewriting $numFilesToRewrite files for DELETE operation"
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaCatalog.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaCatalog.scala
new file mode 100644
index 00000000000..bd1260857ed
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDeltaCatalog.scala
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from DeltaDataSource.scala in the
+ * Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import java.util
+
+import com.databricks.sql.transaction.tahoe.{DeltaConfigs, DeltaErrors}
+import com.databricks.sql.transaction.tahoe.commands.TableCreationModes
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+import com.databricks.sql.transaction.tahoe.sources.DeltaSourceUtils
+import com.nvidia.spark.rapids.RapidsConf
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, Table}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.types.StructType
+
+class GpuDeltaCatalog(
+    override val cpuCatalog: StagingTableCatalog,
+    override val rapidsConf: RapidsConf)
+  extends GpuDeltaCatalogBase with SupportsPathIdentifier with DeltaLogging {
+
+  override protected def buildGpuCreateDeltaTableCommand(
+      rapidsConf: RapidsConf,
+      table: CatalogTable,
+      existingTableOpt: Option[CatalogTable],
+      mode: SaveMode,
+      query: Option[LogicalPlan],
+      operation: TableCreationModes.CreationMode,
+      tableByPath: Boolean): LeafRunnableCommand = {
+    GpuCreateDeltaTableCommand(
+      table,
+      existingTableOpt,
+      mode,
+      query,
+      operation,
+      tableByPath = tableByPath
+    )(rapidsConf)
+  }
+
+  override protected def getExistingTableIfExists(table: TableIdentifier): Option[CatalogTable] = {
+    // If this is a path identifier, we cannot return an existing CatalogTable. The Create command
+    // will check the file system itself
+    if (isPathIdentifier(table)) return None
+    val tableExists = catalog.tableExists(table)
+    if (tableExists) {
+      val oldTable = catalog.getTableMetadata(table)
+      if (oldTable.tableType == CatalogTableType.VIEW) {
+        throw new AnalysisException(
+          s"$table is a view. You may not write data into a view.")
+      }
+      if (!DeltaSourceUtils.isDeltaTable(oldTable.provider)) {
+        throw new AnalysisException(s"$table is not a Delta table. Please drop this " +
+          "table first if you would like to recreate it with Delta Lake.")
+      }
+      Some(oldTable)
+    } else {
+      None
+    }
+  }
+
+  override protected def verifyTableAndSolidify(
+      tableDesc: CatalogTable,
+      query: Option[LogicalPlan]): CatalogTable = {
+
+    if (tableDesc.bucketSpec.isDefined) {
+      throw DeltaErrors.operationNotSupportedException("Bucketing", tableDesc.identifier)
+    }
+
+    val schema = query.map { plan =>
+      assert(tableDesc.schema.isEmpty, "Can't specify table schema in CTAS.")
+      plan.schema.asNullable
+    }.getOrElse(tableDesc.schema)
+
+    PartitioningUtils.validatePartitionColumn(
+      schema,
+      tableDesc.partitionColumnNames,
+      caseSensitive = false) // Delta is case insensitive
+
+    val validatedConfigurations = DeltaConfigs.validateConfigurations(tableDesc.properties)
+
+    val db = tableDesc.identifier.database.getOrElse(catalog.getCurrentDatabase)
+    val tableIdentWithDB = tableDesc.identifier.copy(database = Some(db))
+    tableDesc.copy(
+      identifier = tableIdentWithDB,
+      schema = schema,
+      properties = validatedConfigurations)
+  }
+
+  override protected def createGpuStagedDeltaTableV2(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String],
+      operation: TableCreationModes.CreationMode): StagedTable = {
+    new GpuStagedDeltaTableV2WithLogging(ident, schema, partitions, properties, operation)
+  }
+
+  override def loadTable(ident: Identifier, timestamp: Long): Table = {
+    cpuCatalog.loadTable(ident, timestamp)
+  }
+
+  override def loadTable(ident: Identifier, version: String): Table = {
+    cpuCatalog.loadTable(ident, version)
+  }
+
+  /**
+   * Creates a Delta table using GPU for writing the data
+   *
+   * @param ident              The identifier of the table
+   * @param schema             The schema of the table
+   * @param partitions         The partition transforms for the table
+   * @param allTableProperties The table properties that configure the behavior of the table or
+   *                           provide information about the table
+   * @param writeOptions       Options specific to the write during table creation or replacement
+   * @param sourceQuery        A query if this CREATE request came from a CTAS or RTAS
+   * @param operation          The specific table creation mode, whether this is a
+   *                           Create/Replace/Create or Replace
+   */
+  override def createDeltaTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      allTableProperties: util.Map[String, String],
+      writeOptions: Map[String, String],
+      sourceQuery: Option[DataFrame],
+      operation: TableCreationModes.CreationMode
+  ): Table = recordFrameProfile(
+    "DeltaCatalog", "createDeltaTable") {
+    super.createDeltaTable(
+      ident,
+      schema,
+      partitions,
+      allTableProperties,
+      writeOptions,
+      sourceQuery,
+      operation)
+  }
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table =
+    recordFrameProfile("DeltaCatalog", "createTable") {
+      super.createTable(ident, schema, partitions, properties)
+    }
+
+  override def stageReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageReplace") {
+      super.stageReplace(ident, schema, partitions, properties)
+    }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreateOrReplace") {
+      super.stageCreateOrReplace(ident, schema, partitions, properties)
+    }
+
+  override def stageCreate(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    recordFrameProfile("DeltaCatalog", "stageCreate") {
+      super.stageCreate(ident, schema, partitions, properties)
+    }
+
+  /**
+   * A staged Delta table, which creates a HiveMetaStore entry and appends data if this was a
+   * CTAS/RTAS command. We have a ugly way of using this API right now, but it's the best way to
+   * maintain old behavior compatibility between Databricks Runtime and OSS Delta Lake.
+   */
+  protected class GpuStagedDeltaTableV2WithLogging(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String],
+      operation: TableCreationModes.CreationMode)
+    extends GpuStagedDeltaTableV2(ident, schema, partitions, properties, operation) {
+
+    override def commitStagedChanges(): Unit = recordFrameProfile(
+      "DeltaCatalog", "commitStagedChanges") {
+      super.commitStagedChanges()
+    }
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDoAutoCompaction.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDoAutoCompaction.scala
new file mode 100644
index 00000000000..6b7b24bbefb
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuDoAutoCompaction.scala
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from DoAutoCompaction.scala
+ * from https://github.com/delta-io/delta/pull/1156
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.Action
+import com.databricks.sql.transaction.tahoe.hooks.PostCommitHook
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+
+import org.apache.spark.sql.SparkSession
+
+object GpuDoAutoCompaction extends PostCommitHook
+    with DeltaLogging
+    with Serializable {
+  override val name: String = "Triggers compaction if necessary"
+
+  override def run(spark: SparkSession,
+                   txn: OptimisticTransactionImpl,
+                   committedVersion: Long,
+                   postCommitSnapshot: Snapshot,
+                   committedActions: Seq[Action]): Unit = {
+    val gpuTxn = txn.asInstanceOf[GpuOptimisticTransaction]
+    val newTxn = new GpuDeltaLog(gpuTxn.deltaLog, gpuTxn.rapidsConf).startTransaction()
+    // Note: The Databricks AutoCompact PostCommitHook cannot be used here
+    // (with a GpuOptimisticTransaction). It appears that AutoCompact creates a new transaction,
+    // thereby circumventing GpuOptimisticTransaction (which intercepts Parquet writes
+    // to go through the GPU).
+    new GpuOptimizeExecutor(spark, newTxn, Seq.empty, Seq.empty, committedActions).optimize()
+  }
+
+  override def handleError(error: Throwable, version: Long): Unit =
+    throw DeltaErrors.postCommitHookFailedException(this, version, name, error)
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuLowShuffleMergeCommand.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuLowShuffleMergeCommand.scala
new file mode 100644
index 00000000000..fddebda33bd
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuLowShuffleMergeCommand.scala
@@ -0,0 +1,1083 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from MergeIntoCommand.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import java.net.URI
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable
+
+import com.databricks.sql.io.RowIndexFilterType
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.DeltaOperations.MergePredicate
+import com.databricks.sql.transaction.tahoe.DeltaParquetFileFormat.DeletionVectorDescriptorWithFilterType
+import com.databricks.sql.transaction.tahoe.actions.{AddCDCFile, AddFile, DeletionVectorDescriptor, FileAction}
+import com.databricks.sql.transaction.tahoe.commands.DeltaCommand
+import com.databricks.sql.transaction.tahoe.rapids.MergeExecutor.{toDeletionVector, totalBytesAndDistinctPartitionValues, FILE_PATH_COL, INCR_METRICS_COL, INCR_METRICS_FIELD, ROW_DROPPED_COL, ROW_DROPPED_FIELD, SOURCE_ROW_PRESENT_COL, SOURCE_ROW_PRESENT_FIELD, TARGET_ROW_PRESENT_COL, TARGET_ROW_PRESENT_FIELD}
+import com.databricks.sql.transaction.tahoe.schema.ImplicitMetadataOperation
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.databricks.sql.transaction.tahoe.util.{AnalysisHelper, DeltaFileOperations}
+import com.nvidia.spark.rapids.{GpuOverrides, RapidsConf, SparkPlanMeta}
+import com.nvidia.spark.rapids.RapidsConf.DELTA_LOW_SHUFFLE_MERGE_DEL_VECTOR_BROADCAST_THRESHOLD
+import com.nvidia.spark.rapids.delta._
+import com.nvidia.spark.rapids.delta.GpuDeltaParquetFileFormatUtils.{METADATA_ROW_DEL_COL, METADATA_ROW_DEL_FIELD, METADATA_ROW_IDX_COL, METADATA_ROW_IDX_FIELD}
+import com.nvidia.spark.rapids.shims.FileSourceScanExecMeta
+import org.roaringbitmap.longlong.Roaring64Bitmap
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, CaseWhen, Expression, Literal, NamedExpression, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.plans.logical.{DeltaMergeAction, DeltaMergeIntoClause, DeltaMergeIntoMatchedClause, DeltaMergeIntoMatchedDeleteClause, DeltaMergeIntoMatchedUpdateClause, DeltaMergeIntoNotMatchedBySourceClause, DeltaMergeIntoNotMatchedBySourceDeleteClause, DeltaMergeIntoNotMatchedBySourceUpdateClause, DeltaMergeIntoNotMatchedClause, DeltaMergeIntoNotMatchedInsertClause, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
+
+/**
+ * GPU version of Delta Lake's low shuffle merge implementation.
+ *
+ * Performs a merge of a source query/table into a Delta table.
+ *
+ * Issues an error message when the ON search_condition of the MERGE statement can match
+ * a single row from the target table with multiple rows of the source table-reference.
+ * Different from the original implementation, it optimized writing touched unmodified target files.
+ *
+ * Algorithm:
+ *
+ * Phase 1: Find the input files in target that are touched by the rows that satisfy
+ * the condition and verify that no two source rows match with the same target row.
+ * This is implemented as an inner-join using the given condition. See [[findTouchedFiles]]
+ * for more details.
+ *
+ * Phase 2: Read the touched files again and write new files with updated and/or inserted rows
+ * without copying unmodified rows.
+ *
+ * Phase 3: Read the touched files again and write new files with unmodified rows in target table,
+ * trying to keep its original order and avoid shuffle as much as possible.
+ *
+ * Phase 4: Use the Delta protocol to atomically remove the touched files and add the new files.
+ *
+ * @param source            Source data to merge from
+ * @param target            Target table to merge into
+ * @param gpuDeltaLog       Delta log to use
+ * @param condition         Condition for a source row to match with a target row
+ * @param matchedClauses    All info related to matched clauses.
+ * @param notMatchedClauses All info related to not matched clause.
+ * @param migratedSchema    The final schema of the target - may be changed by schema evolution.
+ */
+case class GpuLowShuffleMergeCommand(
+    @transient source: LogicalPlan,
+    @transient target: LogicalPlan,
+    @transient gpuDeltaLog: GpuDeltaLog,
+    condition: Expression,
+    matchedClauses: Seq[DeltaMergeIntoMatchedClause],
+    notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause],
+    notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause],
+    migratedSchema: Option[StructType])(
+    @transient val rapidsConf: RapidsConf)
+  extends LeafRunnableCommand
+    with DeltaCommand with PredicateHelper with AnalysisHelper with ImplicitMetadataOperation {
+
+  import SQLMetrics._
+
+  override val otherCopyArgs: Seq[AnyRef] = Seq(rapidsConf)
+
+  override val canMergeSchema: Boolean = conf.getConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE)
+  override val canOverwriteSchema: Boolean = false
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("num_affected_rows", LongType)(),
+    AttributeReference("num_updated_rows", LongType)(),
+    AttributeReference("num_deleted_rows", LongType)(),
+    AttributeReference("num_inserted_rows", LongType)())
+
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+  @transient lazy val targetDeltaLog: DeltaLog = gpuDeltaLog.deltaLog
+
+  override lazy val metrics = Map[String, SQLMetric](
+    "numSourceRows" -> createMetric(sc, "number of source rows"),
+    "numSourceRowsInSecondScan" ->
+      createMetric(sc, "number of source rows (during repeated scan)"),
+    "numTargetRowsCopied" -> createMetric(sc, "number of target rows rewritten unmodified"),
+    "numTargetRowsInserted" -> createMetric(sc, "number of inserted rows"),
+    "numTargetRowsUpdated" -> createMetric(sc, "number of updated rows"),
+    "numTargetRowsDeleted" -> createMetric(sc, "number of deleted rows"),
+    "numTargetRowsMatchedUpdated" -> createMetric(sc, "number of target rows updated when matched"),
+    "numTargetRowsMatchedDeleted" -> createMetric(sc, "number of target rows deleted when matched"),
+    "numTargetRowsNotMatchedBySourceUpdated" -> createMetric(sc,
+      "number of target rows updated when not matched by source"),
+    "numTargetRowsNotMatchedBySourceDeleted" -> createMetric(sc,
+      "number of target rows deleted when not matched by source"),
+    "numTargetFilesBeforeSkipping" -> createMetric(sc, "number of target files before skipping"),
+    "numTargetFilesAfterSkipping" -> createMetric(sc, "number of target files after skipping"),
+    "numTargetFilesRemoved" -> createMetric(sc, "number of files removed to target"),
+    "numTargetFilesAdded" -> createMetric(sc, "number of files added to target"),
+    "numTargetChangeFilesAdded" ->
+      createMetric(sc, "number of change data capture files generated"),
+    "numTargetChangeFileBytes" ->
+      createMetric(sc, "total size of change data capture files generated"),
+    "numTargetBytesBeforeSkipping" -> createMetric(sc, "number of target bytes before skipping"),
+    "numTargetBytesAfterSkipping" -> createMetric(sc, "number of target bytes after skipping"),
+    "numTargetBytesRemoved" -> createMetric(sc, "number of target bytes removed"),
+    "numTargetBytesAdded" -> createMetric(sc, "number of target bytes added"),
+    "numTargetPartitionsAfterSkipping" ->
+      createMetric(sc, "number of target partitions after skipping"),
+    "numTargetPartitionsRemovedFrom" ->
+      createMetric(sc, "number of target partitions from which files were removed"),
+    "numTargetPartitionsAddedTo" ->
+      createMetric(sc, "number of target partitions to which files were added"),
+    "executionTimeMs" ->
+      createMetric(sc, "time taken to execute the entire operation"),
+    "scanTimeMs" ->
+      createMetric(sc, "time taken to scan the files for matches"),
+    "rewriteTimeMs" ->
+      createMetric(sc, "time taken to rewrite the matched files"))
+
+  /** Whether this merge statement has only a single insert (NOT MATCHED) clause. */
+  protected def isSingleInsertOnly: Boolean = matchedClauses.isEmpty &&
+    notMatchedClauses.length == 1
+
+  override def run(spark: SparkSession): Seq[Row] = {
+    recordDeltaOperation(targetDeltaLog, "delta.dml.lowshufflemerge") {
+      val startTime = System.nanoTime()
+      val result = gpuDeltaLog.withNewTransaction { deltaTxn =>
+        if (target.schema.size != deltaTxn.metadata.schema.size) {
+          throw DeltaErrors.schemaChangedSinceAnalysis(
+            atAnalysis = target.schema, latestSchema = deltaTxn.metadata.schema)
+        }
+
+        if (canMergeSchema) {
+          updateMetadata(
+            spark, deltaTxn, migratedSchema.getOrElse(target.schema),
+            deltaTxn.metadata.partitionColumns, deltaTxn.metadata.configuration,
+            isOverwriteMode = false, rearrangeOnly = false)
+        }
+
+
+        val (executor, fallback) = {
+          val context = MergeExecutorContext(this, spark, deltaTxn, rapidsConf)
+          if (isSingleInsertOnly && spark.conf.get(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED)) {
+            (new InsertOnlyMergeExecutor(context), false)
+          } else {
+            val executor = new LowShuffleMergeExecutor(context)
+            (executor, executor.shouldFallback())
+          }
+        }
+
+        if (fallback) {
+          None
+        } else {
+          Some(runLowShuffleMerge(spark, startTime, deltaTxn, executor))
+        }
+      }
+
+      result match {
+        case Some(row) => row
+        case None =>
+          // We should rollback to normal gpu
+          new GpuMergeIntoCommand(source, target, gpuDeltaLog, condition, matchedClauses,
+            notMatchedClauses, notMatchedBySourceClauses, migratedSchema)(rapidsConf)
+            .run(spark)
+      }
+    }
+  }
+
+
+  private def runLowShuffleMerge(
+      spark: SparkSession,
+      startTime: Long,
+      deltaTxn: GpuOptimisticTransactionBase,
+      mergeExecutor: MergeExecutor): Seq[Row] = {
+    val deltaActions = mergeExecutor.execute()
+    // Metrics should be recorded before commit (where they are written to delta logs).
+    metrics("executionTimeMs").set((System.nanoTime() - startTime) / 1000 / 1000)
+    deltaTxn.registerSQLMetrics(spark, metrics)
+
+    // This is a best-effort sanity check.
+    if (metrics("numSourceRowsInSecondScan").value >= 0 &&
+      metrics("numSourceRows").value != metrics("numSourceRowsInSecondScan").value) {
+      log.warn(s"Merge source has ${metrics("numSourceRows").value} rows in initial scan but " +
+        s"${metrics("numSourceRowsInSecondScan").value} rows in second scan")
+      if (conf.getConf(DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED)) {
+        throw DeltaErrors.sourceNotDeterministicInMergeException(spark)
+      }
+    }
+
+    deltaTxn.commit(
+      deltaActions,
+      DeltaOperations.Merge(
+        Option(condition),
+        matchedClauses.map(DeltaOperations.MergePredicate(_)),
+        notMatchedClauses.map(DeltaOperations.MergePredicate(_)),
+        // We do not support notMatchedBySourcePredicates yet and fall back to CPU
+        // See https://github.com/NVIDIA/spark-rapids/issues/8415
+        notMatchedBySourcePredicates = Seq.empty[MergePredicate]
+      ))
+
+    // Record metrics
+    val stats = GpuMergeStats.fromMergeSQLMetrics(
+      metrics,
+      condition,
+      matchedClauses,
+      notMatchedClauses,
+      deltaTxn.metadata.partitionColumns.nonEmpty)
+    recordDeltaEvent(targetDeltaLog, "delta.dml.merge.stats", data = stats)
+
+
+    spark.sharedState.cacheManager.recacheByPlan(spark, target)
+
+    // This is needed to make the SQL metrics visible in the Spark UI. Also this needs
+    // to be outside the recordMergeOperation because this method will update some metric.
+    val executionId = spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(spark.sparkContext, executionId, metrics.values.toSeq)
+    Seq(Row(metrics("numTargetRowsUpdated").value + metrics("numTargetRowsDeleted").value +
+      metrics("numTargetRowsInserted").value, metrics("numTargetRowsUpdated").value,
+      metrics("numTargetRowsDeleted").value, metrics("numTargetRowsInserted").value))
+  }
+
+  /**
+   * Execute the given `thunk` and return its result while recording the time taken to do it.
+   *
+   * @param sqlMetricName name of SQL metric to update with the time taken by the thunk
+   * @param thunk         the code to execute
+   */
+  def recordMergeOperation[A](sqlMetricName: String)(thunk: => A): A = {
+    val startTimeNs = System.nanoTime()
+    val r = thunk
+    val timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
+    if (sqlMetricName != null && timeTakenMs > 0) {
+      metrics(sqlMetricName) += timeTakenMs
+    }
+    r
+  }
+
+  /** Expressions to increment SQL metrics */
+  def makeMetricUpdateUDF(name: String, deterministic: Boolean = false)
+  : Expression = {
+    // only capture the needed metric in a local variable
+    val metric = metrics(name)
+    var u = DeltaUDF.boolean(new GpuDeltaMetricUpdateUDF(metric))
+    if (!deterministic) {
+      u = u.asNondeterministic()
+    }
+    u.apply().expr
+  }
+}
+
+/**
+ * Context merge execution.
+ */
+case class MergeExecutorContext(cmd: GpuLowShuffleMergeCommand,
+    spark: SparkSession,
+    deltaTxn: OptimisticTransaction,
+    rapidsConf: RapidsConf)
+
+trait MergeExecutor extends AnalysisHelper with PredicateHelper with Logging {
+
+  val context: MergeExecutorContext
+
+
+  /**
+   * Map to get target output attributes by name.
+   * The case sensitivity of the map is set accordingly to Spark configuration.
+   */
+  @transient private lazy val targetOutputAttributesMap: Map[String, Attribute] = {
+    val attrMap: Map[String, Attribute] = context.cmd.target
+      .outputSet.view
+      .map(attr => attr.name -> attr).toMap
+    if (context.cmd.conf.caseSensitiveAnalysis) {
+      attrMap
+    } else {
+      CaseInsensitiveMap(attrMap)
+    }
+  }
+
+  def execute(): Seq[FileAction]
+
+  protected def targetOutputCols: Seq[NamedExpression] = {
+    context.deltaTxn.metadata.schema.map { col =>
+      targetOutputAttributesMap
+        .get(col.name)
+        .map { a =>
+          AttributeReference(col.name, col.dataType, col.nullable)(a.exprId)
+        }
+        .getOrElse(Alias(Literal(null), col.name)())
+    }
+  }
+
+  /**
+   * Build a DataFrame using the given `files` that has the same output columns (exprIds)
+   * as the `target` logical plan, so that existing update/insert expressions can be applied
+   * on this new plan.
+   */
+  protected def buildTargetDFWithFiles(files: Seq[AddFile]): DataFrame = {
+    val targetOutputColsMap = {
+      val colsMap: Map[String, NamedExpression] = targetOutputCols.view
+        .map(col => col.name -> col).toMap
+      if (context.cmd.conf.caseSensitiveAnalysis) {
+        colsMap
+      } else {
+        CaseInsensitiveMap(colsMap)
+      }
+    }
+
+    val plan = {
+      // We have to do surgery to use the attributes from `targetOutputCols` to scan the table.
+      // In cases of schema evolution, they may not be the same type as the original attributes.
+      val original =
+        context.deltaTxn.deltaLog.createDataFrame(context.deltaTxn.snapshot, files)
+          .queryExecution
+          .analyzed
+      val transformed = original.transform {
+        case LogicalRelation(base, _, catalogTbl, isStreaming) =>
+          LogicalRelation(
+            base,
+            // We can ignore the new columns which aren't yet AttributeReferences.
+            targetOutputCols.collect { case a: AttributeReference => a },
+            catalogTbl,
+            isStreaming)
+      }
+
+      // In case of schema evolution & column mapping, we would also need to rebuild the file
+      // format because under column mapping, the reference schema within DeltaParquetFileFormat
+      // that is used to populate metadata needs to be updated
+      if (context.deltaTxn.metadata.columnMappingMode != NoMapping) {
+        val updatedFileFormat = context.deltaTxn.deltaLog.fileFormat(
+          context.deltaTxn.deltaLog.unsafeVolatileSnapshot.protocol, context.deltaTxn.metadata)
+        DeltaTableUtils.replaceFileFormat(transformed, updatedFileFormat)
+      } else {
+        transformed
+      }
+    }
+
+    // For each plan output column, find the corresponding target output column (by name) and
+    // create an alias
+    val aliases = plan.output.map {
+      case newAttrib: AttributeReference =>
+        val existingTargetAttrib = targetOutputColsMap.getOrElse(newAttrib.name,
+          throw new AnalysisException(
+            s"Could not find ${newAttrib.name} among the existing target output " +
+              targetOutputCols.mkString(","))).asInstanceOf[AttributeReference]
+
+        if (existingTargetAttrib.exprId == newAttrib.exprId) {
+          // It's not valid to alias an expression to its own exprId (this is considered a
+          // non-unique exprId by the analyzer), so we just use the attribute directly.
+          newAttrib
+        } else {
+          Alias(newAttrib, existingTargetAttrib.name)(exprId = existingTargetAttrib.exprId)
+        }
+    }
+
+    Dataset.ofRows(context.spark, Project(aliases, plan))
+  }
+
+
+  /**
+   * Repartitions the output DataFrame by the partition columns if table is partitioned
+   * and `merge.repartitionBeforeWrite.enabled` is set to true.
+   */
+  protected def repartitionIfNeeded(df: DataFrame): DataFrame = {
+    val partitionColumns = context.deltaTxn.metadata.partitionColumns
+    // TODO: We should remove this method and use optimized write instead, see
+    // https://github.com/NVIDIA/spark-rapids/issues/10417
+    if (partitionColumns.nonEmpty && context.spark.conf.get(DeltaSQLConf
+      .MERGE_REPARTITION_BEFORE_WRITE)) {
+      df.repartition(partitionColumns.map(col): _*)
+    } else {
+      df
+    }
+  }
+
+  protected def sourceDF: DataFrame = {
+    // UDF to increment metrics
+    val incrSourceRowCountExpr = context.cmd.makeMetricUpdateUDF("numSourceRows")
+    Dataset.ofRows(context.spark, context.cmd.source)
+      .filter(new Column(incrSourceRowCountExpr))
+  }
+
+  /** Whether this merge statement has no insert (NOT MATCHED) clause. */
+  protected def hasNoInserts: Boolean = context.cmd.notMatchedClauses.isEmpty
+
+
+}
+
+/**
+ * This is an optimization of the case when there is no update clause for the merge.
+ * We perform an left anti join on the source data to find the rows to be inserted.
+ *
+ * This will currently only optimize for the case when there is a _single_ notMatchedClause.
+ */
+class InsertOnlyMergeExecutor(override val context: MergeExecutorContext) extends MergeExecutor {
+  override def execute(): Seq[FileAction] = {
+    context.cmd.recordMergeOperation(sqlMetricName = "rewriteTimeMs") {
+
+      // UDFs to update metrics
+      val incrSourceRowCountExpr = context.cmd.makeMetricUpdateUDF("numSourceRows")
+      val incrInsertedCountExpr = context.cmd.makeMetricUpdateUDF("numTargetRowsInserted")
+
+      val outputColNames = targetOutputCols.map(_.name)
+      // we use head here since we know there is only a single notMatchedClause
+      val outputExprs = context.cmd.notMatchedClauses.head.resolvedActions.map(_.expr)
+      val outputCols = outputExprs.zip(outputColNames).map { case (expr, name) =>
+        new Column(Alias(expr, name)())
+      }
+
+      // source DataFrame
+      val sourceDF = Dataset.ofRows(context.spark, context.cmd.source)
+        .filter(new Column(incrSourceRowCountExpr))
+        .filter(new Column(context.cmd.notMatchedClauses.head.condition
+          .getOrElse(Literal.TrueLiteral)))
+
+      // Skip data based on the merge condition
+      val conjunctivePredicates = splitConjunctivePredicates(context.cmd.condition)
+      val targetOnlyPredicates =
+        conjunctivePredicates.filter(_.references.subsetOf(context.cmd.target.outputSet))
+      val dataSkippedFiles = context.deltaTxn.filterFiles(targetOnlyPredicates)
+
+      // target DataFrame
+      val targetDF = buildTargetDFWithFiles(dataSkippedFiles)
+
+      val insertDf = sourceDF.join(targetDF, new Column(context.cmd.condition), "leftanti")
+        .select(outputCols: _*)
+        .filter(new Column(incrInsertedCountExpr))
+
+      val newFiles = context.deltaTxn
+        .writeFiles(repartitionIfNeeded(insertDf,
+        ))
+
+      // Update metrics
+      context.cmd.metrics("numTargetFilesBeforeSkipping") += context.deltaTxn.snapshot.numOfFiles
+      context.cmd.metrics("numTargetBytesBeforeSkipping") += context.deltaTxn.snapshot.sizeInBytes
+      val (afterSkippingBytes, afterSkippingPartitions) =
+        totalBytesAndDistinctPartitionValues(dataSkippedFiles)
+      context.cmd.metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size
+      context.cmd.metrics("numTargetBytesAfterSkipping") += afterSkippingBytes
+      context.cmd.metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions
+      context.cmd.metrics("numTargetFilesRemoved") += 0
+      context.cmd.metrics("numTargetBytesRemoved") += 0
+      context.cmd.metrics("numTargetPartitionsRemovedFrom") += 0
+      val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles)
+      context.cmd.metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile])
+      context.cmd.metrics("numTargetBytesAdded") += addedBytes
+      context.cmd.metrics("numTargetPartitionsAddedTo") += addedPartitions
+      newFiles
+    }
+  }
+}
+
+
+/**
+ * This is an optimized algorithm for merge statement, where we avoid shuffling the unmodified
+ * target data.
+ *
+ * The algorithm is as follows:
+ * 1. Find touched target files in the target table by joining the source and target data, with
+ * collecting joined row identifiers as (`__metadata_file_path`, `__metadata_row_idx`) pairs.
+ * 2. Read the touched files again and write new files with updated and/or inserted rows
+ * without coping unmodified data from target table, but filtering target table with collected
+ * rows mentioned above.
+ * 3. Read the touched files again, filtering unmodified rows with collected row identifiers
+ * collected in first step, and saving them without shuffle.
+ */
+class LowShuffleMergeExecutor(override val context: MergeExecutorContext) extends MergeExecutor {
+
+  // We over-count numTargetRowsDeleted when there are multiple matches;
+  // this is the amount of the overcount, so we can subtract it to get a correct final metric.
+  private var multipleMatchDeleteOnlyOvercount: Option[Long] = None
+
+  // UDFs to update metrics
+  private val incrSourceRowCountExpr: Expression = context.cmd.
+    makeMetricUpdateUDF("numSourceRowsInSecondScan")
+  private val incrUpdatedCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsUpdated")
+  private val incrUpdatedMatchedCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsMatchedUpdated")
+  private val incrUpdatedNotMatchedBySourceCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsNotMatchedBySourceUpdated")
+  private val incrInsertedCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsInserted")
+  private val incrDeletedCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsDeleted")
+  private val incrDeletedMatchedCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsMatchedDeleted")
+  private val incrDeletedNotMatchedBySourceCountExpr: Expression = context.cmd
+    .makeMetricUpdateUDF("numTargetRowsNotMatchedBySourceDeleted")
+
+  private def updateOutput(resolvedActions: Seq[DeltaMergeAction], incrExpr: Expression)
+  : Seq[Expression] = {
+    resolvedActions.map(_.expr) :+
+      Literal.FalseLiteral :+
+      UnresolvedAttribute(TARGET_ROW_PRESENT_COL) :+
+      UnresolvedAttribute(SOURCE_ROW_PRESENT_COL) :+
+      incrExpr
+  }
+
+  private def deleteOutput(incrExpr: Expression): Seq[Expression] = {
+    targetOutputCols :+
+      TrueLiteral :+
+      UnresolvedAttribute(TARGET_ROW_PRESENT_COL) :+
+      UnresolvedAttribute(SOURCE_ROW_PRESENT_COL) :+
+      incrExpr
+  }
+
+  private def insertOutput(resolvedActions: Seq[DeltaMergeAction], incrExpr: Expression)
+  : Seq[Expression] = {
+    resolvedActions.map(_.expr) :+
+      Literal.FalseLiteral :+
+      UnresolvedAttribute(TARGET_ROW_PRESENT_COL) :+
+      UnresolvedAttribute(SOURCE_ROW_PRESENT_COL) :+
+      incrExpr
+  }
+
+  private def clauseOutput(clause: DeltaMergeIntoClause): Seq[Expression] = clause match {
+    case u: DeltaMergeIntoMatchedUpdateClause =>
+      updateOutput(u.resolvedActions, And(incrUpdatedCountExpr, incrUpdatedMatchedCountExpr))
+    case _: DeltaMergeIntoMatchedDeleteClause =>
+      deleteOutput(And(incrDeletedCountExpr, incrDeletedMatchedCountExpr))
+    case i: DeltaMergeIntoNotMatchedInsertClause =>
+      insertOutput(i.resolvedActions, incrInsertedCountExpr)
+    case u: DeltaMergeIntoNotMatchedBySourceUpdateClause =>
+      updateOutput(u.resolvedActions,
+        And(incrUpdatedCountExpr, incrUpdatedNotMatchedBySourceCountExpr))
+    case _: DeltaMergeIntoNotMatchedBySourceDeleteClause =>
+      deleteOutput(And(incrDeletedCountExpr, incrDeletedNotMatchedBySourceCountExpr))
+  }
+
+  private def clauseCondition(clause: DeltaMergeIntoClause): Expression = {
+    // if condition is None, then expression always evaluates to true
+    clause.condition.getOrElse(TrueLiteral)
+  }
+
+  /**
+   * Though low shuffle merge algorithm performs better than traditional merge algorithm in some
+   * cases, there are some case we should fallback to traditional merge executor:
+   *
+   * 1. Low shuffle merge algorithm requires generating metadata columns such as
+   * [[METADATA_ROW_IDX_COL]], [[METADATA_ROW_DEL_COL]], which only implemented on
+   * [[org.apache.spark.sql.rapids.GpuFileSourceScanExec]]. That means we need to fallback to
+   * this normal executor when [[org.apache.spark.sql.rapids.GpuFileSourceScanExec]] is disabled
+   * for some reason.
+   * 2. Low shuffle merge algorithm currently needs to broadcast deletion vector, which may
+   * introduce extra overhead. It maybe better to fallback to this algorithm when the changeset
+   * it too large.
+   */
+  def shouldFallback(): Boolean = {
+    // Trying to detect if we can execute finding touched files.
+    val touchFilePlanOverrideSucceed = verifyGpuPlan(planForFindingTouchedFiles()) { planMeta =>
+      def check(meta: SparkPlanMeta[SparkPlan]): Boolean = {
+        meta match {
+          case scan if scan.isInstanceOf[FileSourceScanExecMeta] => scan
+            .asInstanceOf[FileSourceScanExecMeta]
+            .wrapped
+            .schema
+            .fieldNames
+            .contains(METADATA_ROW_IDX_COL) && scan.canThisBeReplaced
+          case m => m.childPlans.exists(check)
+        }
+      }
+
+      check(planMeta)
+    }
+    if (!touchFilePlanOverrideSucceed) {
+      logWarning("Unable to override file scan for low shuffle merge for finding touched files " +
+        "plan, fallback to tradition merge.")
+      return true
+    }
+
+    // Trying to detect if we can execute the merge plan.
+    val mergePlanOverrideSucceed = verifyGpuPlan(planForMergeExecution(touchedFiles)) { planMeta =>
+      var overrideCount = 0
+      def count(meta: SparkPlanMeta[SparkPlan]): Unit = {
+        meta match {
+          case scan if scan.isInstanceOf[FileSourceScanExecMeta] =>
+            if (scan.asInstanceOf[FileSourceScanExecMeta]
+              .wrapped.schema.fieldNames.contains(METADATA_ROW_DEL_COL) && scan.canThisBeReplaced) {
+              overrideCount += 1
+            }
+          case m => m.childPlans.foreach(count)
+        }
+      }
+
+      count(planMeta)
+      overrideCount == 2
+    }
+
+    if (!mergePlanOverrideSucceed) {
+      logWarning("Unable to override file scan for low shuffle merge for merge plan, fallback to " +
+        "tradition merge.")
+      return true
+    }
+
+    val deletionVectorSize = touchedFiles.values.map(_._1.serializedSizeInBytes()).sum
+    val maxDelVectorSize = context.rapidsConf
+      .get(DELTA_LOW_SHUFFLE_MERGE_DEL_VECTOR_BROADCAST_THRESHOLD)
+    if (deletionVectorSize > maxDelVectorSize) {
+      logWarning(
+        s"""Low shuffle merge can't be executed because broadcast deletion vector count
+           |$deletionVectorSize is large than max value $maxDelVectorSize """.stripMargin)
+      return true
+    }
+
+    false
+  }
+
+  private def verifyGpuPlan(input: DataFrame)(checkPlanMeta: SparkPlanMeta[SparkPlan] => Boolean)
+  : Boolean = {
+    val overridePlan = GpuOverrides.wrapAndTagPlan(input.queryExecution.sparkPlan,
+      context.rapidsConf)
+    checkPlanMeta(overridePlan)
+  }
+
+  override def execute(): Seq[FileAction] = {
+    val newFiles = context.cmd.withStatusCode("DELTA",
+      s"Rewriting ${touchedFiles.size} files and saving modified data") {
+      val df = planForMergeExecution(touchedFiles)
+      context.deltaTxn.writeFiles(df)
+    }
+
+    // Update metrics
+    val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles)
+    context.cmd.metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile])
+    context.cmd.metrics("numTargetChangeFilesAdded") += newFiles.count(_.isInstanceOf[AddCDCFile])
+    context.cmd.metrics("numTargetChangeFileBytes") += newFiles.collect {
+        case f: AddCDCFile => f.size
+      }
+      .sum
+    context.cmd.metrics("numTargetBytesAdded") += addedBytes
+    context.cmd.metrics("numTargetPartitionsAddedTo") += addedPartitions
+
+    if (multipleMatchDeleteOnlyOvercount.isDefined) {
+      // Compensate for counting duplicates during the query.
+      val actualRowsDeleted =
+        context.cmd.metrics("numTargetRowsDeleted").value - multipleMatchDeleteOnlyOvercount.get
+      assert(actualRowsDeleted >= 0)
+      context.cmd.metrics("numTargetRowsDeleted").set(actualRowsDeleted)
+    }
+
+    touchedFiles.values.map(_._2).map(_.remove).toSeq ++ newFiles
+  }
+
+  private lazy val dataSkippedFiles: Seq[AddFile] = {
+    // Skip data based on the merge condition
+    val targetOnlyPredicates = splitConjunctivePredicates(context.cmd.condition)
+      .filter(_.references.subsetOf(context.cmd.target.outputSet))
+    context.deltaTxn.filterFiles(targetOnlyPredicates)
+  }
+
+  private lazy val dataSkippedTargetDF: DataFrame = {
+    addRowIndexMetaColumn(buildTargetDFWithFiles(dataSkippedFiles))
+  }
+
+  private lazy val touchedFiles: Map[String, (Roaring64Bitmap, AddFile)] = this.findTouchedFiles()
+
+  private def planForFindingTouchedFiles(): DataFrame = {
+
+    // Apply inner join to between source and target using the merge condition to find matches
+    // In addition, we attach two columns
+    // - METADATA_ROW_IDX column to identify target row in file
+    // - FILE_PATH_COL the target file name the row is from to later identify the files touched
+    // by matched rows
+    val targetDF = dataSkippedTargetDF.withColumn(FILE_PATH_COL, input_file_name())
+
+    sourceDF.join(targetDF, new Column(context.cmd.condition), "inner")
+  }
+
+  private def planForMergeExecution(touchedFiles: Map[String, (Roaring64Bitmap, AddFile)])
+  : DataFrame = {
+    getModifiedDF(touchedFiles).unionAll(getUnmodifiedDF(touchedFiles))
+  }
+
+  /**
+   * Find the target table files that contain the rows that satisfy the merge condition. This is
+   * implemented as an inner-join between the source query/table and the target table using
+   * the merge condition.
+   */
+  private def findTouchedFiles(): Map[String, (Roaring64Bitmap, AddFile)] =
+    context.cmd.recordMergeOperation(sqlMetricName = "scanTimeMs") {
+      context.spark.udf.register("row_index_set", udaf(RoaringBitmapUDAF))
+      // Process the matches from the inner join to record touched files and find multiple matches
+      val collectTouchedFiles = planForFindingTouchedFiles()
+        .select(col(FILE_PATH_COL), col(METADATA_ROW_IDX_COL))
+        .groupBy(FILE_PATH_COL)
+        .agg(
+          expr(s"row_index_set($METADATA_ROW_IDX_COL) as row_idxes"),
+          count("*").as("count"))
+        .collect().map(row => {
+          val filename = row.getAs[String](FILE_PATH_COL)
+          val rowIdxSet = row.getAs[RoaringBitmapWrapper]("row_idxes").inner
+          val count = row.getAs[Long]("count")
+          (filename, (rowIdxSet, count))
+        })
+        .toMap
+
+      val duplicateCount = {
+        val distinctMatchedRowCounts = collectTouchedFiles.values
+          .map(_._1.getLongCardinality).sum
+        val allMatchedRowCounts = collectTouchedFiles.values.map(_._2).sum
+        allMatchedRowCounts - distinctMatchedRowCounts
+      }
+
+      val hasMultipleMatches = duplicateCount > 0
+
+      // Throw error if multiple matches are ambiguous or cannot be computed correctly.
+      val canBeComputedUnambiguously = {
+        // Multiple matches are not ambiguous when there is only one unconditional delete as
+        // all the matched row pairs in the 2nd join in `writeAllChanges` will get deleted.
+        val isUnconditionalDelete = context.cmd.matchedClauses.headOption match {
+          case Some(DeltaMergeIntoMatchedDeleteClause(None)) => true
+          case _ => false
+        }
+        context.cmd.matchedClauses.size == 1 && isUnconditionalDelete
+      }
+
+      if (hasMultipleMatches && !canBeComputedUnambiguously) {
+        throw DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(context.spark)
+      }
+
+      if (hasMultipleMatches) {
+        // This is only allowed for delete-only queries.
+        // This query will count the duplicates for numTargetRowsDeleted in Job 2,
+        // because we count matches after the join and not just the target rows.
+        // We have to compensate for this by subtracting the duplicates later,
+        // so we need to record them here.
+        multipleMatchDeleteOnlyOvercount = Some(duplicateCount)
+      }
+
+      // Get the AddFiles using the touched file names.
+      val touchedFileNames = collectTouchedFiles.keys.toSeq
+
+      val nameToAddFileMap = context.cmd.generateCandidateFileMap(
+        context.cmd.targetDeltaLog.dataPath,
+        dataSkippedFiles)
+
+      val touchedAddFiles = touchedFileNames.map(f =>
+          context.cmd.getTouchedFile(context.cmd.targetDeltaLog.dataPath, f, nameToAddFileMap))
+        .map(f => (DeltaFileOperations
+          .absolutePath(context.cmd.targetDeltaLog.dataPath.toString, f.path)
+          .toString, f)).toMap
+
+      // When the target table is empty, and the optimizer optimized away the join entirely
+      // numSourceRows will be incorrectly 0.
+      // We need to scan the source table once to get the correct
+      // metric here.
+      if (context.cmd.metrics("numSourceRows").value == 0 &&
+        (dataSkippedFiles.isEmpty || dataSkippedTargetDF.take(1).isEmpty)) {
+        val numSourceRows = sourceDF.count()
+        context.cmd.metrics("numSourceRows").set(numSourceRows)
+      }
+
+      // Update metrics
+      context.cmd.metrics("numTargetFilesBeforeSkipping") += context.deltaTxn.snapshot.numOfFiles
+      context.cmd.metrics("numTargetBytesBeforeSkipping") += context.deltaTxn.snapshot.sizeInBytes
+      val (afterSkippingBytes, afterSkippingPartitions) =
+        totalBytesAndDistinctPartitionValues(dataSkippedFiles)
+      context.cmd.metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size
+      context.cmd.metrics("numTargetBytesAfterSkipping") += afterSkippingBytes
+      context.cmd.metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions
+      val (removedBytes, removedPartitions) =
+        totalBytesAndDistinctPartitionValues(touchedAddFiles.values.toSeq)
+      context.cmd.metrics("numTargetFilesRemoved") += touchedAddFiles.size
+      context.cmd.metrics("numTargetBytesRemoved") += removedBytes
+      context.cmd.metrics("numTargetPartitionsRemovedFrom") += removedPartitions
+
+      collectTouchedFiles.map(kv => (kv._1, (kv._2._1, touchedAddFiles(kv._1))))
+    }
+
+
+  /**
+   * Modify original data frame to insert
+   * [[GpuDeltaParquetFileFormatUtils.METADATA_ROW_IDX_COL]].
+   */
+  private def addRowIndexMetaColumn(baseDF: DataFrame): DataFrame = {
+    val rowIdxAttr = AttributeReference(
+      METADATA_ROW_IDX_COL,
+      METADATA_ROW_IDX_FIELD.dataType,
+      METADATA_ROW_IDX_FIELD.nullable)()
+
+    val newPlan = baseDF.queryExecution.analyzed.transformUp {
+      case r@LogicalRelation(fs: HadoopFsRelation, _, _, _) =>
+        val newSchema = StructType(fs.dataSchema.fields).add(METADATA_ROW_IDX_FIELD)
+
+        // This is required to ensure that row index is correctly calculated.
+        val newFileFormat = fs.fileFormat.asInstanceOf[DeltaParquetFileFormat]
+          .copy(isSplittable = false, disablePushDowns = true)
+
+        val newFs = fs.copy(dataSchema = newSchema, fileFormat = newFileFormat)(context.spark)
+
+        val newOutput = r.output :+ rowIdxAttr
+        r.copy(relation = newFs, output = newOutput)
+      case p@Project(projectList, _) =>
+        val newProjectList = projectList :+ rowIdxAttr
+        p.copy(projectList = newProjectList)
+    }
+
+    Dataset.ofRows(context.spark, newPlan)
+  }
+
+  /**
+   * The result is scanning target table with touched files, and added an extra
+   * [[METADATA_ROW_DEL_COL]] to indicate whether filtered by joining with source table in first
+   * step.
+   */
+  private def getTouchedTargetDF(touchedFiles: Map[String, (Roaring64Bitmap, AddFile)])
+  : DataFrame = {
+    // Generate a new target dataframe that has same output attributes exprIds as the target plan.
+    // This allows us to apply the existing resolved update/insert expressions.
+    val baseTargetDF = buildTargetDFWithFiles(touchedFiles.values.map(_._2).toSeq)
+
+    val newPlan = {
+      val rowDelAttr = AttributeReference(
+        METADATA_ROW_DEL_COL,
+        METADATA_ROW_DEL_FIELD.dataType,
+        METADATA_ROW_DEL_FIELD.nullable)()
+
+      baseTargetDF.queryExecution.analyzed.transformUp {
+        case r@LogicalRelation(fs: HadoopFsRelation, _, _, _) =>
+          val newSchema = StructType(fs.dataSchema.fields).add(METADATA_ROW_DEL_FIELD)
+
+          // This is required to ensure that row index is correctly calculated.
+          val newFileFormat = {
+            val oldFormat = fs.fileFormat.asInstanceOf[DeltaParquetFileFormat]
+            val dvs = touchedFiles.map(kv => (new URI(kv._1),
+              DeletionVectorDescriptorWithFilterType(toDeletionVector(kv._2._1),
+                RowIndexFilterType.UNKNOWN)))
+            val broadcastDVs = context.spark.sparkContext.broadcast(dvs)
+
+            oldFormat.copy(isSplittable = false,
+              broadcastDvMap = Some(broadcastDVs),
+              disablePushDowns = true)
+          }
+
+          val newFs = fs.copy(dataSchema = newSchema, fileFormat = newFileFormat)(context.spark)
+
+          val newOutput = r.output :+ rowDelAttr
+          r.copy(relation = newFs, output = newOutput)
+        case p@Project(projectList, _) =>
+          val newProjectList = projectList :+ rowDelAttr
+          p.copy(projectList = newProjectList)
+      }
+    }
+
+    val df = Dataset.ofRows(context.spark, newPlan)
+      .withColumn(TARGET_ROW_PRESENT_COL, lit(true))
+
+    df
+  }
+
+  /**
+   * Generate a plan by calculating modified rows. It's computed by joining source and target
+   * tables, where target table has been filtered by (`__metadata_file_name`,
+   * `__metadata_row_idx`) pairs collected in first step.
+   *
+   * Schema of `modifiedDF`:
+   *
+   * targetSchema + ROW_DROPPED_COL + TARGET_ROW_PRESENT_COL +
+   * SOURCE_ROW_PRESENT_COL + INCR_METRICS_COL
+   * INCR_METRICS_COL
+   *
+   * It consists of several parts:
+   *
+   * 1. Unmatched source rows which are inserted
+   * 2. Unmatched source rows which are deleted
+   * 3. Target rows which are updated
+   * 4. Target rows which are deleted
+   */
+  private def getModifiedDF(touchedFiles: Map[String, (Roaring64Bitmap, AddFile)]): DataFrame = {
+    val sourceDF = this.sourceDF
+      .withColumn(SOURCE_ROW_PRESENT_COL, new Column(incrSourceRowCountExpr))
+
+    val targetDF = getTouchedTargetDF(touchedFiles)
+
+    val joinedDF = {
+      val joinType = if (hasNoInserts &&
+        context.spark.conf.get(DeltaSQLConf.MERGE_MATCHED_ONLY_ENABLED)) {
+        "inner"
+      } else {
+        "leftOuter"
+      }
+      val matchedTargetDF = targetDF.filter(METADATA_ROW_DEL_COL)
+        .drop(METADATA_ROW_DEL_COL)
+
+      sourceDF.join(matchedTargetDF, new Column(context.cmd.condition), joinType)
+    }
+
+    val modifiedRowsSchema = context.deltaTxn.metadata.schema
+      .add(ROW_DROPPED_FIELD)
+      .add(TARGET_ROW_PRESENT_FIELD.copy(nullable = true))
+      .add(SOURCE_ROW_PRESENT_FIELD.copy(nullable = true))
+      .add(INCR_METRICS_FIELD)
+
+    // Here we generate a case when statement to handle all cases:
+    // CASE
+    // WHEN <source matched>
+    //      CASE WHEN <matched condition 1>
+    //            <matched expression 1>
+    //           WHEN <matched condition 2>
+    //            <matched expression 2>
+    //           ELSE
+    //            <matched else expression>
+    // WHEN <source not matched>
+    //      CASE WHEN <source not matched condition 1>
+    //            <not matched expression 1>
+    //           WHEN <matched condition 2>
+    //            <not matched expression 2>
+    //           ELSE
+    //            <not matched else expression>
+    // END
+
+    val notMatchedConditions = context.cmd.notMatchedClauses.map(clauseCondition)
+    val notMatchedExpr = {
+      val deletedNotMatchedRow = {
+        targetOutputCols :+
+          Literal.TrueLiteral :+
+          Literal.FalseLiteral :+
+          Literal(null) :+
+          Literal.TrueLiteral
+      }
+      if (context.cmd.notMatchedClauses.isEmpty) {
+        // If there no `WHEN NOT MATCHED` clause, we should just delete not matched row
+        deletedNotMatchedRow
+      } else {
+        val notMatchedOutputs = context.cmd.notMatchedClauses.map(clauseOutput)
+        modifiedRowsSchema.zipWithIndex.map {
+          case (_, idx) =>
+            CaseWhen(notMatchedConditions.zip(notMatchedOutputs.map(_(idx))),
+              deletedNotMatchedRow(idx))
+        }
+      }
+    }
+
+    val matchedConditions = context.cmd.matchedClauses.map(clauseCondition)
+    val matchedOutputs = context.cmd.matchedClauses.map(clauseOutput)
+    val matchedExprs = {
+      val notMatchedRow = {
+        targetOutputCols :+
+          Literal.FalseLiteral :+
+          Literal.TrueLiteral :+
+          Literal(null) :+
+          Literal.TrueLiteral
+      }
+      if (context.cmd.matchedClauses.isEmpty) {
+        // If there is not matched clause, this is insert only, we should delete this row.
+        notMatchedRow
+      } else {
+        modifiedRowsSchema.zipWithIndex.map {
+          case (_, idx) =>
+            CaseWhen(matchedConditions.zip(matchedOutputs.map(_(idx))),
+              notMatchedRow(idx))
+        }
+      }
+    }
+
+    val sourceRowHasNoMatch = col(TARGET_ROW_PRESENT_COL).isNull.expr
+
+    val modifiedCols = modifiedRowsSchema.zipWithIndex.map { case (col, idx) =>
+      val caseWhen = CaseWhen(
+        Seq(sourceRowHasNoMatch -> notMatchedExpr(idx)),
+        matchedExprs(idx))
+      new Column(Alias(caseWhen, col.name)())
+    }
+
+    val modifiedDF = {
+
+      // Make this a udf to avoid catalyst to be too aggressive to even remove the join!
+      val noopRowDroppedCol = udf(new GpuDeltaNoopUDF()).apply(!col(ROW_DROPPED_COL))
+
+      val modifiedDF = joinedDF.select(modifiedCols: _*)
+        // This will not filter anything since they always return true, but we need to avoid
+        // catalyst from optimizing these udf
+        .filter(noopRowDroppedCol && col(INCR_METRICS_COL))
+        .drop(ROW_DROPPED_COL, INCR_METRICS_COL, TARGET_ROW_PRESENT_COL, SOURCE_ROW_PRESENT_COL)
+
+      repartitionIfNeeded(modifiedDF)
+    }
+
+    modifiedDF
+  }
+
+  private def getUnmodifiedDF(touchedFiles: Map[String, (Roaring64Bitmap, AddFile)]): DataFrame = {
+    getTouchedTargetDF(touchedFiles)
+      .filter(!col(METADATA_ROW_DEL_COL))
+      .drop(TARGET_ROW_PRESENT_COL, METADATA_ROW_DEL_COL)
+  }
+}
+
+
+object MergeExecutor {
+
+  /**
+   * Spark UI will track all normal accumulators along with Spark tasks to show them on Web UI.
+   * However, the accumulator used by `MergeIntoCommand` can store a very large value since it
+   * tracks all files that need to be rewritten. We should ask Spark UI to not remember it,
+   * otherwise, the UI data may consume lots of memory. Hence, we use the prefix `internal.metrics.`
+   * to make this accumulator become an internal accumulator, so that it will not be tracked by
+   * Spark UI.
+   */
+  val TOUCHED_FILES_ACCUM_NAME = "internal.metrics.MergeIntoDelta.touchedFiles"
+
+  val ROW_ID_COL = "_row_id_"
+  val FILE_PATH_COL: String = GpuDeltaParquetFileFormatUtils.FILE_PATH_COL
+  val SOURCE_ROW_PRESENT_COL: String = "_source_row_present_"
+  val SOURCE_ROW_PRESENT_FIELD: StructField = StructField(SOURCE_ROW_PRESENT_COL, BooleanType,
+    nullable = false)
+  val TARGET_ROW_PRESENT_COL: String = "_target_row_present_"
+  val TARGET_ROW_PRESENT_FIELD: StructField = StructField(TARGET_ROW_PRESENT_COL, BooleanType,
+    nullable = false)
+  val ROW_DROPPED_COL: String = GpuDeltaMergeConstants.ROW_DROPPED_COL
+  val ROW_DROPPED_FIELD: StructField = StructField(ROW_DROPPED_COL, BooleanType, nullable = false)
+  val INCR_METRICS_COL: String = "_incr_metrics_"
+  val INCR_METRICS_FIELD: StructField = StructField(INCR_METRICS_COL, BooleanType, nullable = false)
+  val INCR_ROW_COUNT_COL: String = "_incr_row_count_"
+
+  // Some Delta versions use Literal(null) which translates to a literal of NullType instead
+  // of the Literal(null, StringType) which is needed, so using a fixed version here
+  // rather than the version from Delta Lake.
+  val CDC_TYPE_NOT_CDC_LITERAL: Literal = Literal(null, StringType)
+
+  def toDeletionVector(bitmap: Roaring64Bitmap): DeletionVectorDescriptor = {
+    DeletionVectorDescriptor.inlineInLog(RoaringBitmapWrapper(bitmap).serializeToBytes(),
+      bitmap.getLongCardinality)
+  }
+
+  /** Count the number of distinct partition values among the AddFiles in the given set. */
+  def totalBytesAndDistinctPartitionValues(files: Seq[FileAction]): (Long, Int) = {
+    val distinctValues = new mutable.HashSet[Map[String, String]]()
+    var bytes = 0L
+    val iter = files.collect { case a: AddFile => a }.iterator
+    while (iter.hasNext) {
+      val file = iter.next()
+      distinctValues += file.partitionValues
+      bytes += file.size
+    }
+    // If the only distinct value map is an empty map, then it must be an unpartitioned table.
+    // Return 0 in that case.
+    val numDistinctValues =
+      if (distinctValues.size == 1 && distinctValues.head.isEmpty) 0 else distinctValues.size
+    (bytes, numDistinctValues)
+  }
+}
\ No newline at end of file
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuMergeIntoCommand.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuMergeIntoCommand.scala
new file mode 100644
index 00000000000..71e8a413b00
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuMergeIntoCommand.scala
@@ -0,0 +1,1189 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from MergeIntoCommand.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import java.util.concurrent.TimeUnit
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.DeltaOperations.MergePredicate
+import com.databricks.sql.transaction.tahoe.actions.{AddCDCFile, AddFile, FileAction}
+import com.databricks.sql.transaction.tahoe.commands.DeltaCommand
+import com.databricks.sql.transaction.tahoe.schema.ImplicitMetadataOperation
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.databricks.sql.transaction.tahoe.util.{AnalysisHelper, SetAccumulator}
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize
+import com.nvidia.spark.rapids.{BaseExprMeta, GpuOverrides, RapidsConf}
+import com.nvidia.spark.rapids.delta._
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, BasePredicate, Expression, Literal, NamedExpression, PredicateHelper, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
+import org.apache.spark.sql.catalyst.plans.logical.{DeltaMergeIntoClause, DeltaMergeIntoMatchedClause, DeltaMergeIntoMatchedDeleteClause, DeltaMergeIntoMatchedUpdateClause, DeltaMergeIntoNotMatchedBySourceClause, DeltaMergeIntoNotMatchedClause, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DataTypes, LongType, StringType, StructType}
+case class GpuMergeDataSizes(
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    rows: Option[Long] = None,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    files: Option[Long] = None,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    bytes: Option[Long] = None,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    partitions: Option[Long] = None)
+
+/**
+ * Represents the state of a single merge clause:
+ * - merge clause's (optional) predicate
+ * - action type (insert, update, delete)
+ * - action's expressions
+ */
+case class GpuMergeClauseStats(
+    condition: Option[String],
+    actionType: String,
+    actionExpr: Seq[String])
+
+object GpuMergeClauseStats {
+  def apply(mergeClause: DeltaMergeIntoClause): GpuMergeClauseStats = {
+    GpuMergeClauseStats(
+      condition = mergeClause.condition.map(_.sql),
+      mergeClause.clauseType.toLowerCase(),
+      actionExpr = mergeClause.actions.map(_.sql))
+  }
+}
+
+/** State for a GPU merge operation */
+case class GpuMergeStats(
+    // Merge condition expression
+    conditionExpr: String,
+
+    // Expressions used in old MERGE stats, now always Null
+    updateConditionExpr: String,
+    updateExprs: Seq[String],
+    insertConditionExpr: String,
+    insertExprs: Seq[String],
+    deleteConditionExpr: String,
+
+    // Newer expressions used in MERGE with any number of MATCHED/NOT MATCHED
+    matchedStats: Seq[GpuMergeClauseStats],
+    notMatchedStats: Seq[GpuMergeClauseStats],
+
+    // Data sizes of source and target at different stages of processing
+    source: GpuMergeDataSizes,
+    targetBeforeSkipping: GpuMergeDataSizes,
+    targetAfterSkipping: GpuMergeDataSizes,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    sourceRowsInSecondScan: Option[Long],
+
+    // Data change sizes
+    targetFilesRemoved: Long,
+    targetFilesAdded: Long,
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetChangeFilesAdded: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetChangeFileBytes: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetBytesRemoved: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetBytesAdded: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetPartitionsRemovedFrom: Option[Long],
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    targetPartitionsAddedTo: Option[Long],
+    targetRowsCopied: Long,
+    targetRowsUpdated: Long,
+    targetRowsInserted: Long,
+    targetRowsDeleted: Long
+)
+
+object GpuMergeStats {
+
+  def fromMergeSQLMetrics(
+      metrics: Map[String, SQLMetric],
+      condition: Expression,
+      matchedClauses: Seq[DeltaMergeIntoMatchedClause],
+      notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause],
+      isPartitioned: Boolean): GpuMergeStats = {
+
+    def metricValueIfPartitioned(metricName: String): Option[Long] = {
+      if (isPartitioned) Some(metrics(metricName).value) else None
+    }
+
+    GpuMergeStats(
+      // Merge condition expression
+      conditionExpr = condition.sql,
+
+      // Newer expressions used in MERGE with any number of MATCHED/NOT MATCHED
+      matchedStats = matchedClauses.map(GpuMergeClauseStats(_)),
+      notMatchedStats = notMatchedClauses.map(GpuMergeClauseStats(_)),
+
+      // Data sizes of source and target at different stages of processing
+      source = GpuMergeDataSizes(rows = Some(metrics("numSourceRows").value)),
+      targetBeforeSkipping =
+        GpuMergeDataSizes(
+          files = Some(metrics("numTargetFilesBeforeSkipping").value),
+          bytes = Some(metrics("numTargetBytesBeforeSkipping").value)),
+      targetAfterSkipping =
+        GpuMergeDataSizes(
+          files = Some(metrics("numTargetFilesAfterSkipping").value),
+          bytes = Some(metrics("numTargetBytesAfterSkipping").value),
+          partitions = metricValueIfPartitioned("numTargetPartitionsAfterSkipping")),
+      sourceRowsInSecondScan =
+        metrics.get("numSourceRowsInSecondScan").map(_.value).filter(_ >= 0),
+
+      // Data change sizes
+      targetFilesAdded = metrics("numTargetFilesAdded").value,
+      targetChangeFilesAdded = metrics.get("numTargetChangeFilesAdded").map(_.value),
+      targetChangeFileBytes = metrics.get("numTargetChangeFileBytes").map(_.value),
+      targetFilesRemoved = metrics("numTargetFilesRemoved").value,
+      targetBytesAdded = Some(metrics("numTargetBytesAdded").value),
+      targetBytesRemoved = Some(metrics("numTargetBytesRemoved").value),
+      targetPartitionsRemovedFrom = metricValueIfPartitioned("numTargetPartitionsRemovedFrom"),
+      targetPartitionsAddedTo = metricValueIfPartitioned("numTargetPartitionsAddedTo"),
+      targetRowsCopied = metrics("numTargetRowsCopied").value,
+      targetRowsUpdated = metrics("numTargetRowsUpdated").value,
+      targetRowsInserted = metrics("numTargetRowsInserted").value,
+      targetRowsDeleted = metrics("numTargetRowsDeleted").value,
+
+      // Deprecated fields
+      updateConditionExpr = null,
+      updateExprs = null,
+      insertConditionExpr = null,
+      insertExprs = null,
+      deleteConditionExpr = null)
+  }
+}
+
+/**
+ * GPU version of Delta Lake's MergeIntoCommand.
+ *
+ * Performs a merge of a source query/table into a Delta table.
+ *
+ * Issues an error message when the ON search_condition of the MERGE statement can match
+ * a single row from the target table with multiple rows of the source table-reference.
+ *
+ * Algorithm:
+ *
+ * Phase 1: Find the input files in target that are touched by the rows that satisfy
+ *    the condition and verify that no two source rows match with the same target row.
+ *    This is implemented as an inner-join using the given condition. See [[findTouchedFiles]]
+ *    for more details.
+ *
+ * Phase 2: Read the touched files again and write new files with updated and/or inserted rows.
+ *
+ * Phase 3: Use the Delta protocol to atomically remove the touched files and add the new files.
+ *
+ * @param source            Source data to merge from
+ * @param target            Target table to merge into
+ * @param gpuDeltaLog       Delta log to use
+ * @param condition         Condition for a source row to match with a target row
+ * @param matchedClauses    All info related to matched clauses.
+ * @param notMatchedClauses  All info related to not matched clause.
+ * @param migratedSchema    The final schema of the target - may be changed by schema evolution.
+ */
+case class GpuMergeIntoCommand(
+    @transient source: LogicalPlan,
+    @transient target: LogicalPlan,
+    @transient gpuDeltaLog: GpuDeltaLog,
+    condition: Expression,
+    matchedClauses: Seq[DeltaMergeIntoMatchedClause],
+    notMatchedClauses: Seq[DeltaMergeIntoNotMatchedClause],
+    notMatchedBySourceClauses: Seq[DeltaMergeIntoNotMatchedBySourceClause],
+    migratedSchema: Option[StructType])(
+    @transient val rapidsConf: RapidsConf)
+    extends LeafRunnableCommand
+    with DeltaCommand with PredicateHelper with AnalysisHelper with ImplicitMetadataOperation {
+
+  import GpuMergeIntoCommand._
+
+  import SQLMetrics._
+  import com.databricks.sql.transaction.tahoe.commands.cdc.CDCReader._
+
+  override val otherCopyArgs: Seq[AnyRef] = Seq(rapidsConf)
+
+  override val canMergeSchema: Boolean = conf.getConf(DeltaSQLConf.DELTA_SCHEMA_AUTO_MIGRATE)
+  override val canOverwriteSchema: Boolean = false
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("num_affected_rows", LongType)(),
+    AttributeReference("num_updated_rows", LongType)(),
+    AttributeReference("num_deleted_rows", LongType)(),
+    AttributeReference("num_inserted_rows", LongType)())
+
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+  @transient private lazy val targetDeltaLog: DeltaLog = gpuDeltaLog.deltaLog
+  /**
+   * Map to get target output attributes by name.
+   * The case sensitivity of the map is set accordingly to Spark configuration.
+   */
+  @transient private lazy val targetOutputAttributesMap: Map[String, Attribute] = {
+    val attrMap: Map[String, Attribute] = target
+        .outputSet.view
+        .map(attr => attr.name -> attr).toMap
+    if (conf.caseSensitiveAnalysis) {
+      attrMap
+    } else {
+      CaseInsensitiveMap(attrMap)
+    }
+  }
+
+  /** Whether this merge statement has only a single insert (NOT MATCHED) clause. */
+  private def isSingleInsertOnly: Boolean = matchedClauses.isEmpty && notMatchedClauses.length == 1
+  /** Whether this merge statement has only MATCHED clauses. */
+  private def isMatchedOnly: Boolean = notMatchedClauses.isEmpty && matchedClauses.nonEmpty
+
+  // We over-count numTargetRowsDeleted when there are multiple matches;
+  // this is the amount of the overcount, so we can subtract it to get a correct final metric.
+  private var multipleMatchDeleteOnlyOvercount: Option[Long] = None
+
+  override lazy val metrics = Map[String, SQLMetric](
+    "numSourceRows" -> createMetric(sc, "number of source rows"),
+    "numSourceRowsInSecondScan" ->
+        createMetric(sc, "number of source rows (during repeated scan)"),
+    "numTargetRowsCopied" -> createMetric(sc, "number of target rows rewritten unmodified"),
+    "numTargetRowsInserted" -> createMetric(sc, "number of inserted rows"),
+    "numTargetRowsUpdated" -> createMetric(sc, "number of updated rows"),
+    "numTargetRowsDeleted" -> createMetric(sc, "number of deleted rows"),
+    "numTargetFilesBeforeSkipping" -> createMetric(sc, "number of target files before skipping"),
+    "numTargetFilesAfterSkipping" -> createMetric(sc, "number of target files after skipping"),
+    "numTargetFilesRemoved" -> createMetric(sc, "number of files removed to target"),
+    "numTargetFilesAdded" -> createMetric(sc, "number of files added to target"),
+    "numTargetChangeFilesAdded" ->
+        createMetric(sc, "number of change data capture files generated"),
+    "numTargetChangeFileBytes" ->
+        createMetric(sc, "total size of change data capture files generated"),
+    "numTargetBytesBeforeSkipping" -> createMetric(sc, "number of target bytes before skipping"),
+    "numTargetBytesAfterSkipping" -> createMetric(sc, "number of target bytes after skipping"),
+    "numTargetBytesRemoved" -> createMetric(sc, "number of target bytes removed"),
+    "numTargetBytesAdded" -> createMetric(sc, "number of target bytes added"),
+    "numTargetPartitionsAfterSkipping" ->
+        createMetric(sc, "number of target partitions after skipping"),
+    "numTargetPartitionsRemovedFrom" ->
+        createMetric(sc, "number of target partitions from which files were removed"),
+    "numTargetPartitionsAddedTo" ->
+        createMetric(sc, "number of target partitions to which files were added"),
+    "executionTimeMs" ->
+        createMetric(sc, "time taken to execute the entire operation"),
+    "scanTimeMs" ->
+        createMetric(sc, "time taken to scan the files for matches"),
+    "rewriteTimeMs" ->
+        createMetric(sc, "time taken to rewrite the matched files"))
+
+  override def run(spark: SparkSession): Seq[Row] = {
+    recordDeltaOperation(targetDeltaLog, "delta.dml.merge") {
+      val startTime = System.nanoTime()
+      gpuDeltaLog.withNewTransaction { deltaTxn =>
+        if (target.schema.size != deltaTxn.metadata.schema.size) {
+          throw DeltaErrors.schemaChangedSinceAnalysis(
+            atAnalysis = target.schema, latestSchema = deltaTxn.metadata.schema)
+        }
+
+        if (canMergeSchema) {
+          updateMetadata(
+            spark, deltaTxn, migratedSchema.getOrElse(target.schema),
+            deltaTxn.metadata.partitionColumns, deltaTxn.metadata.configuration,
+            isOverwriteMode = false, rearrangeOnly = false)
+        }
+
+        val deltaActions = {
+          if (isSingleInsertOnly && spark.conf.get(DeltaSQLConf.MERGE_INSERT_ONLY_ENABLED)) {
+            writeInsertsOnlyWhenNoMatchedClauses(spark, deltaTxn)
+          } else {
+            val filesToRewrite = findTouchedFiles(spark, deltaTxn)
+            val newWrittenFiles = withStatusCode("DELTA", "Writing merged data") {
+              writeAllChanges(spark, deltaTxn, filesToRewrite)
+            }
+            filesToRewrite.map(_.remove) ++ newWrittenFiles
+          }
+        }
+
+        // Metrics should be recorded before commit (where they are written to delta logs).
+        metrics("executionTimeMs").set((System.nanoTime() - startTime) / 1000 / 1000)
+        deltaTxn.registerSQLMetrics(spark, metrics)
+
+        // This is a best-effort sanity check.
+        if (metrics("numSourceRowsInSecondScan").value >= 0 &&
+            metrics("numSourceRows").value != metrics("numSourceRowsInSecondScan").value) {
+          log.warn(s"Merge source has ${metrics("numSourceRows").value} rows in initial scan but " +
+              s"${metrics("numSourceRowsInSecondScan").value} rows in second scan")
+          if (conf.getConf(DeltaSQLConf.MERGE_FAIL_IF_SOURCE_CHANGED)) {
+            throw DeltaErrors.sourceNotDeterministicInMergeException(spark)
+          }
+        }
+
+        deltaTxn.commit(
+          deltaActions,
+          DeltaOperations.Merge(
+            Option(condition),
+            matchedClauses.map(DeltaOperations.MergePredicate(_)),
+            notMatchedClauses.map(DeltaOperations.MergePredicate(_)),
+            // We do not support notMatchedBySourcePredicates yet and fall back to CPU
+            // See https://github.com/NVIDIA/spark-rapids/issues/8415
+            notMatchedBySourcePredicates = Seq.empty[MergePredicate]
+        ))
+
+        // Record metrics
+        val stats = GpuMergeStats.fromMergeSQLMetrics(
+          metrics, condition, matchedClauses, notMatchedClauses,
+          deltaTxn.metadata.partitionColumns.nonEmpty)
+        recordDeltaEvent(targetDeltaLog, "delta.dml.merge.stats", data = stats)
+
+      }
+      spark.sharedState.cacheManager.recacheByPlan(spark, target)
+    }
+    // This is needed to make the SQL metrics visible in the Spark UI. Also this needs
+    // to be outside the recordMergeOperation because this method will update some metric.
+    val executionId = spark.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(spark.sparkContext, executionId, metrics.values.toSeq)
+    Seq(Row(metrics("numTargetRowsUpdated").value + metrics("numTargetRowsDeleted").value +
+            metrics("numTargetRowsInserted").value, metrics("numTargetRowsUpdated").value,
+            metrics("numTargetRowsDeleted").value, metrics("numTargetRowsInserted").value))
+  }
+
+  /**
+   * Find the target table files that contain the rows that satisfy the merge condition. This is
+   * implemented as an inner-join between the source query/table and the target table using
+   * the merge condition.
+   */
+  private def findTouchedFiles(
+      spark: SparkSession,
+      deltaTxn: OptimisticTransaction
+  ): Seq[AddFile] = recordMergeOperation(sqlMetricName = "scanTimeMs") {
+
+    // Accumulator to collect all the distinct touched files
+    val touchedFilesAccum = new SetAccumulator[String]()
+    spark.sparkContext.register(touchedFilesAccum, TOUCHED_FILES_ACCUM_NAME)
+
+    // UDFs to records touched files names and add them to the accumulator
+    val recordTouchedFileName = udf(new GpuDeltaRecordTouchedFileNameUDF(touchedFilesAccum))
+        .asNondeterministic()
+
+    // Skip data based on the merge condition
+    val targetOnlyPredicates =
+      splitConjunctivePredicates(condition).filter(_.references.subsetOf(target.outputSet))
+    val dataSkippedFiles = deltaTxn.filterFiles(targetOnlyPredicates)
+
+    // UDF to increment metrics
+    val incrSourceRowCountExpr = makeMetricUpdateUDF("numSourceRows")
+    val sourceDF = Dataset.ofRows(spark, source)
+        .filter(new Column(incrSourceRowCountExpr))
+
+    // Apply inner join to between source and target using the merge condition to find matches
+    // In addition, we attach two columns
+    // - a monotonically increasing row id for target rows to later identify whether the same
+    //     target row is modified by multiple user or not
+    // - the target file name the row is from to later identify the files touched by matched rows
+    val targetDF = Dataset.ofRows(spark, buildTargetPlanWithFiles(deltaTxn, dataSkippedFiles))
+        .withColumn(ROW_ID_COL, monotonically_increasing_id())
+        .withColumn(FILE_NAME_COL, input_file_name())
+    val joinToFindTouchedFiles = sourceDF.join(targetDF, new Column(condition), "inner")
+
+    // Process the matches from the inner join to record touched files and find multiple matches
+    val collectTouchedFiles = joinToFindTouchedFiles
+        .select(col(ROW_ID_COL), recordTouchedFileName(col(FILE_NAME_COL)).as("one"))
+
+    // Calculate frequency of matches per source row
+    val matchedRowCounts = collectTouchedFiles.groupBy(ROW_ID_COL).agg(sum("one").as("count"))
+
+    // Get multiple matches and simultaneously collect (using touchedFilesAccum) the file names
+    // multipleMatchCount = # of target rows with more than 1 matching source row (duplicate match)
+    // multipleMatchSum = total # of duplicate matched rows
+    import spark.implicits._
+    val (multipleMatchCount, multipleMatchSum) = matchedRowCounts
+        .filter("count > 1")
+        .select(coalesce(count("*"), lit(0)), coalesce(sum("count"), lit(0)))
+        .as[(Long, Long)]
+        .collect()
+        .head
+
+    val hasMultipleMatches = multipleMatchCount > 0
+
+    // Throw error if multiple matches are ambiguous or cannot be computed correctly.
+    val canBeComputedUnambiguously = {
+      // Multiple matches are not ambiguous when there is only one unconditional delete as
+      // all the matched row pairs in the 2nd join in `writeAllChanges` will get deleted.
+      val isUnconditionalDelete = matchedClauses.headOption match {
+        case Some(DeltaMergeIntoMatchedDeleteClause(None)) => true
+        case _ => false
+      }
+      matchedClauses.size == 1 && isUnconditionalDelete
+    }
+
+    if (hasMultipleMatches && !canBeComputedUnambiguously) {
+      throw DeltaErrors.multipleSourceRowMatchingTargetRowInMergeException(spark)
+    }
+
+    if (hasMultipleMatches) {
+      // This is only allowed for delete-only queries.
+      // This query will count the duplicates for numTargetRowsDeleted in Job 2,
+      // because we count matches after the join and not just the target rows.
+      // We have to compensate for this by subtracting the duplicates later,
+      // so we need to record them here.
+      val duplicateCount = multipleMatchSum - multipleMatchCount
+      multipleMatchDeleteOnlyOvercount = Some(duplicateCount)
+    }
+
+    // Get the AddFiles using the touched file names.
+    val touchedFileNames = touchedFilesAccum.value.iterator().asScala.toSeq
+    logTrace(s"findTouchedFiles: matched files:\n\t${touchedFileNames.mkString("\n\t")}")
+
+    val nameToAddFileMap = generateCandidateFileMap(targetDeltaLog.dataPath, dataSkippedFiles)
+    val touchedAddFiles = touchedFileNames.map(f =>
+      getTouchedFile(targetDeltaLog.dataPath, f, nameToAddFileMap))
+
+    // When the target table is empty, and the optimizer optimized away the join entirely
+    // numSourceRows will be incorrectly 0. We need to scan the source table once to get the correct
+    // metric here.
+    if (metrics("numSourceRows").value == 0 &&
+        (dataSkippedFiles.isEmpty || targetDF.take(1).isEmpty)) {
+      val numSourceRows = sourceDF.count()
+      metrics("numSourceRows").set(numSourceRows)
+    }
+
+    // Update metrics
+    metrics("numTargetFilesBeforeSkipping") += deltaTxn.snapshot.numOfFiles
+    metrics("numTargetBytesBeforeSkipping") += deltaTxn.snapshot.sizeInBytes
+    val (afterSkippingBytes, afterSkippingPartitions) =
+      totalBytesAndDistinctPartitionValues(dataSkippedFiles)
+    metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size
+    metrics("numTargetBytesAfterSkipping") += afterSkippingBytes
+    metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions
+    val (removedBytes, removedPartitions) = totalBytesAndDistinctPartitionValues(touchedAddFiles)
+    metrics("numTargetFilesRemoved") += touchedAddFiles.size
+    metrics("numTargetBytesRemoved") += removedBytes
+    metrics("numTargetPartitionsRemovedFrom") += removedPartitions
+    touchedAddFiles
+  }
+
+  /**
+   * This is an optimization of the case when there is no update clause for the merge.
+   * We perform an left anti join on the source data to find the rows to be inserted.
+   *
+   * This will currently only optimize for the case when there is a _single_ notMatchedClause.
+   */
+  private def writeInsertsOnlyWhenNoMatchedClauses(
+      spark: SparkSession,
+      deltaTxn: OptimisticTransaction
+  ): Seq[FileAction] = recordMergeOperation(sqlMetricName = "rewriteTimeMs") {
+
+    // UDFs to update metrics
+    val incrSourceRowCountExpr = makeMetricUpdateUDF("numSourceRows")
+    val incrInsertedCountExpr = makeMetricUpdateUDF("numTargetRowsInserted")
+
+    val outputColNames = getTargetOutputCols(deltaTxn).map(_.name)
+    // we use head here since we know there is only a single notMatchedClause
+    val outputExprs = notMatchedClauses.head.resolvedActions.map(_.expr)
+    val outputCols = outputExprs.zip(outputColNames).map { case (expr, name) =>
+      new Column(Alias(expr, name)())
+    }
+
+    // source DataFrame
+    val sourceDF = Dataset.ofRows(spark, source)
+        .filter(new Column(incrSourceRowCountExpr))
+        .filter(new Column(notMatchedClauses.head.condition.getOrElse(Literal.TrueLiteral)))
+
+    // Skip data based on the merge condition
+    val conjunctivePredicates = splitConjunctivePredicates(condition)
+    val targetOnlyPredicates =
+      conjunctivePredicates.filter(_.references.subsetOf(target.outputSet))
+    val dataSkippedFiles = deltaTxn.filterFiles(targetOnlyPredicates)
+
+    // target DataFrame
+    val targetDF = Dataset.ofRows(
+      spark, buildTargetPlanWithFiles(deltaTxn, dataSkippedFiles))
+
+    val insertDf = sourceDF.join(targetDF, new Column(condition), "leftanti")
+        .select(outputCols: _*)
+        .filter(new Column(incrInsertedCountExpr))
+
+    val newFiles = deltaTxn
+        .writeFiles(repartitionIfNeeded(spark, insertDf, deltaTxn.metadata.partitionColumns))
+
+    // Update metrics
+    metrics("numTargetFilesBeforeSkipping") += deltaTxn.snapshot.numOfFiles
+    metrics("numTargetBytesBeforeSkipping") += deltaTxn.snapshot.sizeInBytes
+    val (afterSkippingBytes, afterSkippingPartitions) =
+      totalBytesAndDistinctPartitionValues(dataSkippedFiles)
+    metrics("numTargetFilesAfterSkipping") += dataSkippedFiles.size
+    metrics("numTargetBytesAfterSkipping") += afterSkippingBytes
+    metrics("numTargetPartitionsAfterSkipping") += afterSkippingPartitions
+    metrics("numTargetFilesRemoved") += 0
+    metrics("numTargetBytesRemoved") += 0
+    metrics("numTargetPartitionsRemovedFrom") += 0
+    val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles)
+    metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile])
+    metrics("numTargetBytesAdded") += addedBytes
+    metrics("numTargetPartitionsAddedTo") += addedPartitions
+    newFiles
+  }
+
+  /**
+   * Write new files by reading the touched files and updating/inserting data using the source
+   * query/table. This is implemented using a full|right-outer-join using the merge condition.
+   *
+   * Note that unlike the insert-only code paths with just one control column INCR_ROW_COUNT_COL,
+   * this method has two additional control columns ROW_DROPPED_COL for dropping deleted rows and
+   * CDC_TYPE_COL_NAME used for handling CDC when enabled.
+   */
+  private def writeAllChanges(
+      spark: SparkSession,
+      deltaTxn: OptimisticTransaction,
+      filesToRewrite: Seq[AddFile]
+  ): Seq[FileAction] = recordMergeOperation(sqlMetricName = "rewriteTimeMs") {
+    import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+
+    val cdcEnabled = DeltaConfigs.CHANGE_DATA_FEED.fromMetaData(deltaTxn.metadata)
+
+    var targetOutputCols = getTargetOutputCols(deltaTxn)
+    var outputRowSchema = deltaTxn.metadata.schema
+
+    // When we have duplicate matches (only allowed when the whenMatchedCondition is a delete with
+    // no match condition) we will incorrectly generate duplicate CDC rows.
+    // Duplicate matches can be due to:
+    // - Duplicate rows in the source w.r.t. the merge condition
+    // - A target-only or source-only merge condition, which essentially turns our join into a cross
+    //   join with the target/source satisfiying the merge condition.
+    // These duplicate matches are dropped from the main data output since this is a delete
+    // operation, but the duplicate CDC rows are not removed by default.
+    // See https://github.com/delta-io/delta/issues/1274
+
+    // We address this specific scenario by adding row ids to the target before performing our join.
+    // There should only be one CDC delete row per target row so we can use these row ids to dedupe
+    // the duplicate CDC delete rows.
+
+    // We also need to address the scenario when there are duplicate matches with delete and we
+    // insert duplicate rows. Here we need to additionally add row ids to the source before the
+    // join to avoid dropping these valid duplicate inserted rows and their corresponding cdc rows.
+
+    // When there is an insert clause, we set SOURCE_ROW_ID_COL=null for all delete rows because we
+    // need to drop the duplicate matches.
+    val isDeleteWithDuplicateMatchesAndCdc = multipleMatchDeleteOnlyOvercount.nonEmpty && cdcEnabled
+
+    // Generate a new logical plan that has same output attributes exprIds as the target plan.
+    // This allows us to apply the existing resolved update/insert expressions.
+    val newTarget = buildTargetPlanWithFiles(deltaTxn, filesToRewrite)
+    val joinType = if (isMatchedOnly &&
+        spark.conf.get(DeltaSQLConf.MERGE_MATCHED_ONLY_ENABLED)) {
+      "rightOuter"
+    } else {
+      "fullOuter"
+    }
+
+    logDebug(s"""writeAllChanges using $joinType join:
+                |  source.output: ${source.outputSet}
+                |  target.output: ${target.outputSet}
+                |  condition: $condition
+                |  newTarget.output: ${newTarget.outputSet}
+       """.stripMargin)
+
+    // UDFs to update metrics
+    // Make UDFs that appear in the custom join processor node deterministic, as they always
+    // return true and update a metric. Catalyst precludes non-deterministic UDFs that are not
+    // allowed outside a very specific set of Catalyst nodes (Project, Filter, Window, Aggregate).
+    val incrSourceRowCountExpr = makeMetricUpdateUDF("numSourceRowsInSecondScan")
+    val incrUpdatedCountExpr = makeMetricUpdateUDF("numTargetRowsUpdated", deterministic = true)
+    val incrInsertedCountExpr = makeMetricUpdateUDF("numTargetRowsInserted", deterministic = true)
+    val incrNoopCountExpr = makeMetricUpdateUDF("numTargetRowsCopied", deterministic = true)
+    val incrDeletedCountExpr = makeMetricUpdateUDF("numTargetRowsDeleted", deterministic = true)
+
+    // Apply an outer join to find both, matches and non-matches. We are adding two boolean fields
+    // with value `true`, one to each side of the join. Whether this field is null or not after
+    // the outer join, will allow us to identify whether the resultant joined row was a
+    // matched inner result or an unmatched result with null on one side.
+    // We add row IDs to the targetDF if we have a delete-when-matched clause with duplicate
+    // matches and CDC is enabled, and additionally add row IDs to the source if we also have an
+    // insert clause. See above at isDeleteWithDuplicateMatchesAndCdc definition for more details.
+    var sourceDF = Dataset.ofRows(spark, source)
+        .withColumn(SOURCE_ROW_PRESENT_COL, new Column(incrSourceRowCountExpr))
+    var targetDF = Dataset.ofRows(spark, newTarget)
+        .withColumn(TARGET_ROW_PRESENT_COL, lit(true))
+    if (isDeleteWithDuplicateMatchesAndCdc) {
+      targetDF = targetDF.withColumn(TARGET_ROW_ID_COL, monotonically_increasing_id())
+      if (notMatchedClauses.nonEmpty) { // insert clause
+        sourceDF = sourceDF.withColumn(SOURCE_ROW_ID_COL, monotonically_increasing_id())
+      }
+    }
+    val joinedDF = sourceDF.join(targetDF, new Column(condition), joinType)
+    val joinedPlan = joinedDF.queryExecution.analyzed
+
+    def resolveOnJoinedPlan(exprs: Seq[Expression]): Seq[Expression] = {
+      tryResolveReferencesForExpressions(spark, exprs, joinedPlan)
+    }
+
+    // ==== Generate the expressions to process full-outer join output and generate target rows ====
+    // If there are N columns in the target table, there will be N + 3 columns after processing
+    // - N columns for target table
+    // - ROW_DROPPED_COL to define whether the generated row should dropped or written
+    // - INCR_ROW_COUNT_COL containing a UDF to update the output row row counter
+    // - CDC_TYPE_COLUMN_NAME containing the type of change being performed in a particular row
+
+    // To generate these N + 3 columns, we will generate N + 3 expressions and apply them to the
+    // rows in the joinedDF. The CDC column will be either used for CDC generation or dropped before
+    // performing the final write, and the other two will always be dropped after executing the
+    // metrics UDF and filtering on ROW_DROPPED_COL.
+
+    // We produce rows for both the main table data (with CDC_TYPE_COLUMN_NAME = CDC_TYPE_NOT_CDC),
+    // and rows for the CDC data which will be output to CDCReader.CDC_LOCATION.
+    // See [[CDCReader]] for general details on how partitioning on the CDC type column works.
+
+    // In the following two functions `matchedClauseOutput` and `notMatchedClauseOutput`, we
+    // produce a Seq[Expression] for each intended output row.
+    // Depending on the clause and whether CDC is enabled, we output between 0 and 3 rows, as a
+    // Seq[Seq[Expression]]
+
+    // There is one corner case outlined above at isDeleteWithDuplicateMatchesAndCdc definition.
+    // When we have a delete-ONLY merge with duplicate matches we have N + 4 columns:
+    // N target cols, TARGET_ROW_ID_COL, ROW_DROPPED_COL, INCR_ROW_COUNT_COL, CDC_TYPE_COLUMN_NAME
+    // When we have a delete-when-matched merge with duplicate matches + an insert clause, we have
+    // N + 5 columns:
+    // N target cols, TARGET_ROW_ID_COL, SOURCE_ROW_ID_COL, ROW_DROPPED_COL, INCR_ROW_COUNT_COL,
+    // CDC_TYPE_COLUMN_NAME
+    // These ROW_ID_COL will always be dropped before the final write.
+
+    if (isDeleteWithDuplicateMatchesAndCdc) {
+      targetOutputCols = targetOutputCols :+ UnresolvedAttribute(TARGET_ROW_ID_COL)
+      outputRowSchema = outputRowSchema.add(TARGET_ROW_ID_COL, DataTypes.LongType)
+      if (notMatchedClauses.nonEmpty) { // there is an insert clause, make SRC_ROW_ID_COL=null
+        targetOutputCols = targetOutputCols :+ Alias(Literal(null), SOURCE_ROW_ID_COL)()
+        outputRowSchema = outputRowSchema.add(SOURCE_ROW_ID_COL, DataTypes.LongType)
+      }
+    }
+
+    if (cdcEnabled) {
+      outputRowSchema = outputRowSchema
+          .add(ROW_DROPPED_COL, DataTypes.BooleanType)
+          .add(INCR_ROW_COUNT_COL, DataTypes.BooleanType)
+          .add(CDC_TYPE_COLUMN_NAME, DataTypes.StringType)
+    }
+
+    def matchedClauseOutput(clause: DeltaMergeIntoMatchedClause): Seq[Seq[Expression]] = {
+      val exprs = clause match {
+        case u: DeltaMergeIntoMatchedUpdateClause =>
+          // Generate update expressions and set ROW_DELETED_COL = false and
+          // CDC_TYPE_COLUMN_NAME = CDC_TYPE_NOT_CDC
+          val mainDataOutput = u.resolvedActions.map(_.expr) :+ FalseLiteral :+
+              incrUpdatedCountExpr :+ CDC_TYPE_NOT_CDC_LITERAL
+          if (cdcEnabled) {
+            // For update preimage, we have do a no-op copy with ROW_DELETED_COL = false and
+            // CDC_TYPE_COLUMN_NAME = CDC_TYPE_UPDATE_PREIMAGE and INCR_ROW_COUNT_COL as a no-op
+            // (because the metric will be incremented in `mainDataOutput`)
+            val preImageOutput = targetOutputCols :+ FalseLiteral :+ TrueLiteral :+
+                Literal(CDC_TYPE_UPDATE_PREIMAGE)
+            // For update postimage, we have the same expressions as for mainDataOutput but with
+            // INCR_ROW_COUNT_COL as a no-op (because the metric will be incremented in
+            // `mainDataOutput`), and CDC_TYPE_COLUMN_NAME = CDC_TYPE_UPDATE_POSTIMAGE
+            val postImageOutput = mainDataOutput.dropRight(2) :+ TrueLiteral :+
+                Literal(CDC_TYPE_UPDATE_POSTIMAGE)
+            Seq(mainDataOutput, preImageOutput, postImageOutput)
+          } else {
+            Seq(mainDataOutput)
+          }
+        case _: DeltaMergeIntoMatchedDeleteClause =>
+          // Generate expressions to set the ROW_DELETED_COL = true and CDC_TYPE_COLUMN_NAME =
+          // CDC_TYPE_NOT_CDC
+          val mainDataOutput = targetOutputCols :+ TrueLiteral :+ incrDeletedCountExpr :+
+              CDC_TYPE_NOT_CDC_LITERAL
+          if (cdcEnabled) {
+            // For delete we do a no-op copy with ROW_DELETED_COL = false, INCR_ROW_COUNT_COL as a
+            // no-op (because the metric will be incremented in `mainDataOutput`) and
+            // CDC_TYPE_COLUMN_NAME = CDC_TYPE_DELETE
+            val deleteCdcOutput = targetOutputCols :+ FalseLiteral :+ TrueLiteral :+
+                Literal(CDC_TYPE_DELETE)
+            Seq(mainDataOutput, deleteCdcOutput)
+          } else {
+            Seq(mainDataOutput)
+          }
+      }
+      exprs.map(resolveOnJoinedPlan)
+    }
+
+    def notMatchedClauseOutput(clause: DeltaMergeIntoNotMatchedClause): Seq[Seq[Expression]] = {
+      // Generate insert expressions and set ROW_DELETED_COL = false and
+      // CDC_TYPE_COLUMN_NAME = CDC_TYPE_NOT_CDC
+      val insertExprs = clause.resolvedActions.map(_.expr)
+      val mainDataOutput = resolveOnJoinedPlan(
+        if (isDeleteWithDuplicateMatchesAndCdc) {
+          // Must be delete-when-matched merge with duplicate matches + insert clause
+          // Therefore we must keep the target row id and source row id. Since this is a not-matched
+          // clause we know the target row-id will be null. See above at
+          // isDeleteWithDuplicateMatchesAndCdc definition for more details.
+          insertExprs :+
+              Alias(Literal(null), TARGET_ROW_ID_COL)() :+ UnresolvedAttribute(SOURCE_ROW_ID_COL) :+
+              FalseLiteral :+ incrInsertedCountExpr :+ CDC_TYPE_NOT_CDC_LITERAL
+        } else {
+          insertExprs :+ FalseLiteral :+ incrInsertedCountExpr :+ CDC_TYPE_NOT_CDC_LITERAL
+        }
+      )
+      if (cdcEnabled) {
+        // For insert we have the same expressions as for mainDataOutput, but with
+        // INCR_ROW_COUNT_COL as a no-op (because the metric will be incremented in
+        // `mainDataOutput`), and CDC_TYPE_COLUMN_NAME = CDC_TYPE_INSERT
+        val insertCdcOutput = mainDataOutput.dropRight(2) :+ TrueLiteral :+ Literal(CDC_TYPE_INSERT)
+        Seq(mainDataOutput, insertCdcOutput)
+      } else {
+        Seq(mainDataOutput)
+      }
+    }
+
+    def clauseCondition(clause: DeltaMergeIntoClause): Expression = {
+      // if condition is None, then expression always evaluates to true
+      val condExpr = clause.condition.getOrElse(TrueLiteral)
+      resolveOnJoinedPlan(Seq(condExpr)).head
+    }
+
+    val targetRowHasNoMatch = resolveOnJoinedPlan(Seq(col(SOURCE_ROW_PRESENT_COL).isNull.expr)).head
+    val sourceRowHasNoMatch = resolveOnJoinedPlan(Seq(col(TARGET_ROW_PRESENT_COL).isNull.expr)).head
+    val matchedConditions = matchedClauses.map(clauseCondition)
+    val matchedOutputs = matchedClauses.map(matchedClauseOutput)
+    val notMatchedConditions = notMatchedClauses.map(clauseCondition)
+    val notMatchedOutputs = notMatchedClauses.map(notMatchedClauseOutput)
+    // TODO support notMatchedBySourceClauses which is new in DBR 12.2
+    // https://github.com/NVIDIA/spark-rapids/issues/8415
+    val notMatchedBySourceConditions = Seq.empty
+    val notMatchedBySourceOutputs = Seq.empty
+    val noopCopyOutput =
+      resolveOnJoinedPlan(targetOutputCols :+ FalseLiteral :+ incrNoopCountExpr :+
+          CDC_TYPE_NOT_CDC_LITERAL)
+    val deleteRowOutput =
+      resolveOnJoinedPlan(targetOutputCols :+ TrueLiteral :+ TrueLiteral :+
+          CDC_TYPE_NOT_CDC_LITERAL)
+    var outputDF = addMergeJoinProcessor(spark, joinedPlan, outputRowSchema,
+      targetRowHasNoMatch = targetRowHasNoMatch,
+      sourceRowHasNoMatch = sourceRowHasNoMatch,
+      matchedConditions = matchedConditions,
+      matchedOutputs = matchedOutputs,
+      notMatchedConditions = notMatchedConditions,
+      notMatchedOutputs = notMatchedOutputs,
+      notMatchedBySourceConditions = notMatchedBySourceConditions,
+      notMatchedBySourceOutputs = notMatchedBySourceOutputs,
+      noopCopyOutput = noopCopyOutput,
+      deleteRowOutput = deleteRowOutput)
+
+    if (isDeleteWithDuplicateMatchesAndCdc) {
+      // When we have a delete when matched clause with duplicate matches we have to remove
+      // duplicate CDC rows. This scenario is further explained at
+      // isDeleteWithDuplicateMatchesAndCdc definition.
+
+      // To remove duplicate CDC rows generated by the duplicate matches we dedupe by
+      // TARGET_ROW_ID_COL since there should only be one CDC delete row per target row.
+      // When there is an insert clause in addition to the delete clause we additionally dedupe by
+      // SOURCE_ROW_ID_COL and CDC_TYPE_COLUMN_NAME to avoid dropping valid duplicate inserted rows
+      // and their corresponding CDC rows.
+      val columnsToDedupeBy = if (notMatchedClauses.nonEmpty) { // insert clause
+        Seq(TARGET_ROW_ID_COL, SOURCE_ROW_ID_COL, CDC_TYPE_COLUMN_NAME)
+      } else {
+        Seq(TARGET_ROW_ID_COL)
+      }
+      outputDF = outputDF
+          .dropDuplicates(columnsToDedupeBy)
+          .drop(ROW_DROPPED_COL, INCR_ROW_COUNT_COL, TARGET_ROW_ID_COL, SOURCE_ROW_ID_COL)
+    } else {
+      outputDF = outputDF.drop(ROW_DROPPED_COL, INCR_ROW_COUNT_COL)
+    }
+
+    logDebug("writeAllChanges: join output plan:\n" + outputDF.queryExecution)
+
+    // Write to Delta
+    val newFiles = deltaTxn
+        .writeFiles(repartitionIfNeeded(spark, outputDF, deltaTxn.metadata.partitionColumns))
+
+    // Update metrics
+    val (addedBytes, addedPartitions) = totalBytesAndDistinctPartitionValues(newFiles)
+    metrics("numTargetFilesAdded") += newFiles.count(_.isInstanceOf[AddFile])
+    metrics("numTargetChangeFilesAdded") += newFiles.count(_.isInstanceOf[AddCDCFile])
+    metrics("numTargetChangeFileBytes") += newFiles.collect{ case f: AddCDCFile => f.size }.sum
+    metrics("numTargetBytesAdded") += addedBytes
+    metrics("numTargetPartitionsAddedTo") += addedPartitions
+    if (multipleMatchDeleteOnlyOvercount.isDefined) {
+      // Compensate for counting duplicates during the query.
+      val actualRowsDeleted =
+        metrics("numTargetRowsDeleted").value - multipleMatchDeleteOnlyOvercount.get
+      assert(actualRowsDeleted >= 0)
+      metrics("numTargetRowsDeleted").set(actualRowsDeleted)
+    }
+
+    newFiles
+  }
+
+  private def addMergeJoinProcessor(
+      spark: SparkSession,
+      joinedPlan: LogicalPlan,
+      outputRowSchema: StructType,
+      targetRowHasNoMatch: Expression,
+      sourceRowHasNoMatch: Expression,
+      matchedConditions: Seq[Expression],
+      matchedOutputs: Seq[Seq[Seq[Expression]]],
+      notMatchedConditions: Seq[Expression],
+      notMatchedOutputs: Seq[Seq[Seq[Expression]]],
+      notMatchedBySourceConditions: Seq[Expression],
+      notMatchedBySourceOutputs: Seq[Seq[Seq[Expression]]],
+      noopCopyOutput: Seq[Expression],
+      deleteRowOutput: Seq[Expression]): Dataset[Row] = {
+    def wrap(e: Expression): BaseExprMeta[Expression] = {
+      GpuOverrides.wrapExpr(e, rapidsConf, None)
+    }
+
+    val targetRowHasNoMatchMeta = wrap(targetRowHasNoMatch)
+    val sourceRowHasNoMatchMeta = wrap(sourceRowHasNoMatch)
+    val matchedConditionsMetas = matchedConditions.map(wrap)
+    val matchedOutputsMetas = matchedOutputs.map(_.map(_.map(wrap)))
+    val notMatchedConditionsMetas = notMatchedConditions.map(wrap)
+    val notMatchedOutputsMetas = notMatchedOutputs.map(_.map(_.map(wrap)))
+    val notMatchedBySourceConditionsMetas = notMatchedBySourceConditions.map(wrap)
+    val notMatchedBySourceOutputsMetas = notMatchedBySourceOutputs.map(_.map(_.map(wrap)))
+    val noopCopyOutputMetas = noopCopyOutput.map(wrap)
+    val deleteRowOutputMetas = deleteRowOutput.map(wrap)
+    val allMetas = Seq(targetRowHasNoMatchMeta, sourceRowHasNoMatchMeta) ++
+        matchedConditionsMetas ++ matchedOutputsMetas.flatten.flatten ++
+        notMatchedConditionsMetas ++ notMatchedOutputsMetas.flatten.flatten ++
+        notMatchedBySourceConditionsMetas ++ notMatchedBySourceOutputsMetas.flatten.flatten ++
+        noopCopyOutputMetas ++ deleteRowOutputMetas
+    allMetas.foreach(_.tagForGpu())
+    val canReplace = allMetas.forall(_.canExprTreeBeReplaced) && rapidsConf.isOperatorEnabled(
+      "spark.rapids.sql.exec.RapidsProcessDeltaMergeJoinExec", false, false)
+    if (rapidsConf.shouldExplainAll || (rapidsConf.shouldExplain && !canReplace)) {
+      val exprExplains = allMetas.map(_.explain(rapidsConf.shouldExplainAll))
+      val execWorkInfo = if (canReplace) {
+        "will run on GPU"
+      } else {
+        "cannot run on GPU because not all merge processing expressions can be replaced"
+      }
+      logWarning(s"<RapidsProcessDeltaMergeJoinExec> $execWorkInfo:\n" +
+          s"  ${exprExplains.mkString("  ")}")
+    }
+
+    if (canReplace) {
+      val processedJoinPlan = RapidsProcessDeltaMergeJoin(
+        joinedPlan,
+        toAttributes(outputRowSchema),
+        targetRowHasNoMatch = targetRowHasNoMatch,
+        sourceRowHasNoMatch = sourceRowHasNoMatch,
+        matchedConditions = matchedConditions,
+        matchedOutputs = matchedOutputs,
+        notMatchedConditions = notMatchedConditions,
+        notMatchedOutputs = notMatchedOutputs,
+        notMatchedBySourceConditions = notMatchedBySourceConditions,
+        notMatchedBySourceOutputs = notMatchedBySourceOutputs,
+        noopCopyOutput = noopCopyOutput,
+        deleteRowOutput = deleteRowOutput)
+      Dataset.ofRows(spark, processedJoinPlan)
+    } else {
+      val joinedRowEncoder = ExpressionEncoder(RowEncoder.encoderFor(joinedPlan.schema))
+      val outputRowEncoder = ExpressionEncoder(RowEncoder.encoderFor(outputRowSchema)).
+        resolveAndBind()
+
+      val processor = new JoinedRowProcessor(
+        targetRowHasNoMatch = targetRowHasNoMatch,
+        sourceRowHasNoMatch = sourceRowHasNoMatch,
+        matchedConditions = matchedConditions,
+        matchedOutputs = matchedOutputs,
+        notMatchedConditions = notMatchedConditions,
+        notMatchedOutputs = notMatchedOutputs,
+        noopCopyOutput = noopCopyOutput,
+        deleteRowOutput = deleteRowOutput,
+        joinedAttributes = joinedPlan.output,
+        joinedRowEncoder = joinedRowEncoder,
+        outputRowEncoder = outputRowEncoder)
+
+      Dataset.ofRows(spark, joinedPlan).mapPartitions(processor.processPartition)(outputRowEncoder)
+    }
+  }
+
+  /**
+   * Build a new logical plan using the given `files` that has the same output columns (exprIds)
+   * as the `target` logical plan, so that existing update/insert expressions can be applied
+   * on this new plan.
+   */
+  private def buildTargetPlanWithFiles(
+      deltaTxn: OptimisticTransaction,
+      files: Seq[AddFile]): LogicalPlan = {
+    val targetOutputCols = getTargetOutputCols(deltaTxn)
+    val targetOutputColsMap = {
+      val colsMap: Map[String, NamedExpression] = targetOutputCols.view
+          .map(col => col.name -> col).toMap
+      if (conf.caseSensitiveAnalysis) {
+        colsMap
+      } else {
+        CaseInsensitiveMap(colsMap)
+      }
+    }
+
+    val plan = {
+      // We have to do surgery to use the attributes from `targetOutputCols` to scan the table.
+      // In cases of schema evolution, they may not be the same type as the original attributes.
+      val original =
+      deltaTxn.deltaLog.createDataFrame(deltaTxn.snapshot, files).queryExecution.analyzed
+      val transformed = original.transform {
+        case LogicalRelation(base, _, catalogTbl, isStreaming) =>
+          LogicalRelation(
+            base,
+            // We can ignore the new columns which aren't yet AttributeReferences.
+            targetOutputCols.collect { case a: AttributeReference => a },
+            catalogTbl,
+            isStreaming)
+      }
+
+      // In case of schema evolution & column mapping, we would also need to rebuild the file format
+      // because under column mapping, the reference schema within DeltaParquetFileFormat
+      // that is used to populate metadata needs to be updated
+      if (deltaTxn.metadata.columnMappingMode != NoMapping) {
+        val updatedFileFormat = deltaTxn.deltaLog.fileFormat(
+          deltaTxn.deltaLog.unsafeVolatileSnapshot.protocol, deltaTxn.metadata)
+        DeltaTableUtils.replaceFileFormat(transformed, updatedFileFormat)
+      } else {
+        transformed
+      }
+    }
+
+    // For each plan output column, find the corresponding target output column (by name) and
+    // create an alias
+    val aliases = plan.output.map {
+      case newAttrib: AttributeReference =>
+        val existingTargetAttrib = targetOutputColsMap.get(newAttrib.name)
+            .getOrElse {
+              throw new AnalysisException(
+                s"Could not find ${newAttrib.name} among the existing target output " +
+                  targetOutputCols.mkString(","))
+            }.asInstanceOf[AttributeReference]
+
+        if (existingTargetAttrib.exprId == newAttrib.exprId) {
+          // It's not valid to alias an expression to its own exprId (this is considered a
+          // non-unique exprId by the analyzer), so we just use the attribute directly.
+          newAttrib
+        } else {
+          Alias(newAttrib, existingTargetAttrib.name)(exprId = existingTargetAttrib.exprId)
+        }
+    }
+
+    Project(aliases, plan)
+  }
+
+  /** Expressions to increment SQL metrics */
+  private def makeMetricUpdateUDF(name: String, deterministic: Boolean = false): Expression = {
+    // only capture the needed metric in a local variable
+    val metric = metrics(name)
+    var u = udf(new GpuDeltaMetricUpdateUDF(metric))
+    if (!deterministic) {
+      u = u.asNondeterministic()
+    }
+    u.apply().expr
+  }
+
+  private def getTargetOutputCols(txn: OptimisticTransaction): Seq[NamedExpression] = {
+    txn.metadata.schema.map { col =>
+      targetOutputAttributesMap
+          .get(col.name)
+          .map { a =>
+            AttributeReference(col.name, col.dataType, col.nullable)(a.exprId)
+          }
+          .getOrElse(Alias(Literal(null), col.name)()
+          )
+    }
+  }
+
+  /**
+   * Repartitions the output DataFrame by the partition columns if table is partitioned
+   * and `merge.repartitionBeforeWrite.enabled` is set to true.
+   */
+  protected def repartitionIfNeeded(
+      spark: SparkSession,
+      df: DataFrame,
+      partitionColumns: Seq[String]): DataFrame = {
+    if (partitionColumns.nonEmpty && spark.conf.get(DeltaSQLConf.MERGE_REPARTITION_BEFORE_WRITE)) {
+      df.repartition(partitionColumns.map(col): _*)
+    } else {
+      df
+    }
+  }
+
+  /**
+   * Execute the given `thunk` and return its result while recording the time taken to do it.
+   *
+   * @param sqlMetricName name of SQL metric to update with the time taken by the thunk
+   * @param thunk the code to execute
+   */
+  private def recordMergeOperation[A](sqlMetricName: String)(thunk: => A): A = {
+    val startTimeNs = System.nanoTime()
+    val r = thunk
+    val timeTakenMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
+    if (sqlMetricName != null && timeTakenMs > 0) {
+      metrics(sqlMetricName) += timeTakenMs
+    }
+    r
+  }
+}
+
+object GpuMergeIntoCommand {
+  /**
+   * Spark UI will track all normal accumulators along with Spark tasks to show them on Web UI.
+   * However, the accumulator used by `MergeIntoCommand` can store a very large value since it
+   * tracks all files that need to be rewritten. We should ask Spark UI to not remember it,
+   * otherwise, the UI data may consume lots of memory. Hence, we use the prefix `internal.metrics.`
+   * to make this accumulator become an internal accumulator, so that it will not be tracked by
+   * Spark UI.
+   */
+  val TOUCHED_FILES_ACCUM_NAME = "internal.metrics.MergeIntoDelta.touchedFiles"
+
+  val ROW_ID_COL = "_row_id_"
+  val TARGET_ROW_ID_COL = "_target_row_id_"
+  val SOURCE_ROW_ID_COL = "_source_row_id_"
+  val FILE_NAME_COL = "_file_name_"
+  val SOURCE_ROW_PRESENT_COL = "_source_row_present_"
+  val TARGET_ROW_PRESENT_COL = "_target_row_present_"
+  val ROW_DROPPED_COL = GpuDeltaMergeConstants.ROW_DROPPED_COL
+  val INCR_ROW_COUNT_COL = "_incr_row_count_"
+
+  // Some Delta versions use Literal(null) which translates to a literal of NullType instead
+  // of the Literal(null, StringType) which is needed, so using a fixed version here
+  // rather than the version from Delta Lake.
+  val CDC_TYPE_NOT_CDC_LITERAL = Literal(null, StringType)
+
+  /**
+   * @param targetRowHasNoMatch   whether a joined row is a target row with no match in the source
+   *                              table
+   * @param sourceRowHasNoMatch   whether a joined row is a source row with no match in the target
+   *                              table
+   * @param matchedConditions     condition for each match clause
+   * @param matchedOutputs        corresponding output for each match clause. for each clause, we
+   *                              have 1-3 output rows, each of which is a sequence of expressions
+   *                              to apply to the joined row
+   * @param notMatchedConditions  condition for each not-matched clause
+   * @param notMatchedOutputs     corresponding output for each not-matched clause. for each clause,
+   *                              we have 1-2 output rows, each of which is a sequence of
+   *                              expressions to apply to the joined row
+   * @param noopCopyOutput        no-op expression to copy a target row to the output
+   * @param deleteRowOutput       expression to drop a row from the final output. this is used for
+   *                              source rows that don't match any not-matched clauses
+   * @param joinedAttributes      schema of our outer-joined dataframe
+   * @param joinedRowEncoder      joinedDF row encoder
+   * @param outputRowEncoder      final output row encoder
+   */
+  class JoinedRowProcessor(
+      targetRowHasNoMatch: Expression,
+      sourceRowHasNoMatch: Expression,
+      matchedConditions: Seq[Expression],
+      matchedOutputs: Seq[Seq[Seq[Expression]]],
+      notMatchedConditions: Seq[Expression],
+      notMatchedOutputs: Seq[Seq[Seq[Expression]]],
+      noopCopyOutput: Seq[Expression],
+      deleteRowOutput: Seq[Expression],
+      joinedAttributes: Seq[Attribute],
+      joinedRowEncoder: ExpressionEncoder[Row],
+      outputRowEncoder: ExpressionEncoder[Row]) extends Serializable {
+
+    private def generateProjection(exprs: Seq[Expression]): UnsafeProjection = {
+      UnsafeProjection.create(exprs, joinedAttributes)
+    }
+
+    private def generatePredicate(expr: Expression): BasePredicate = {
+      GeneratePredicate.generate(expr, joinedAttributes)
+    }
+
+    def processPartition(rowIterator: Iterator[Row]): Iterator[Row] = {
+
+      val targetRowHasNoMatchPred = generatePredicate(targetRowHasNoMatch)
+      val sourceRowHasNoMatchPred = generatePredicate(sourceRowHasNoMatch)
+      val matchedPreds = matchedConditions.map(generatePredicate)
+      val matchedProjs = matchedOutputs.map(_.map(generateProjection))
+      val notMatchedPreds = notMatchedConditions.map(generatePredicate)
+      val notMatchedProjs = notMatchedOutputs.map(_.map(generateProjection))
+      val noopCopyProj = generateProjection(noopCopyOutput)
+      val deleteRowProj = generateProjection(deleteRowOutput)
+      val outputProj = UnsafeProjection.create(outputRowEncoder.schema)
+
+      // this is accessing ROW_DROPPED_COL. If ROW_DROPPED_COL is not in outputRowEncoder.schema
+      // then CDC must be disabled and it's the column after our output cols
+      def shouldDeleteRow(row: InternalRow): Boolean = {
+        row.getBoolean(
+          outputRowEncoder.schema.getFieldIndex(ROW_DROPPED_COL)
+              .getOrElse(outputRowEncoder.schema.fields.size)
+        )
+      }
+
+      def processRow(inputRow: InternalRow): Iterator[InternalRow] = {
+        if (targetRowHasNoMatchPred.eval(inputRow)) {
+          // Target row did not match any source row, so just copy it to the output
+          Iterator(noopCopyProj.apply(inputRow))
+        } else {
+          // identify which set of clauses to execute: matched or not-matched ones
+          val (predicates, projections, noopAction) = if (sourceRowHasNoMatchPred.eval(inputRow)) {
+            // Source row did not match with any target row, so insert the new source row
+            (notMatchedPreds, notMatchedProjs, deleteRowProj)
+          } else {
+            // Source row matched with target row, so update the target row
+            (matchedPreds, matchedProjs, noopCopyProj)
+          }
+
+          // find (predicate, projection) pair whose predicate satisfies inputRow
+          val pair = (predicates zip projections).find {
+            case (predicate, _) => predicate.eval(inputRow)
+          }
+
+          pair match {
+            case Some((_, projections)) =>
+              projections.map(_.apply(inputRow)).iterator
+            case None => Iterator(noopAction.apply(inputRow))
+          }
+        }
+      }
+
+      val toRow = joinedRowEncoder.createSerializer()
+      val fromRow = outputRowEncoder.createDeserializer()
+      rowIterator
+          .map(toRow)
+          .flatMap(processRow)
+          .filter(!shouldDeleteRow(_))
+          .map { notDeletedInternalRow =>
+            fromRow(outputProj(notDeletedInternalRow))
+          }
+    }
+  }
+
+  /** Count the number of distinct partition values among the AddFiles in the given set. */
+  def totalBytesAndDistinctPartitionValues(files: Seq[FileAction]): (Long, Int) = {
+    val distinctValues = new mutable.HashSet[Map[String, String]]()
+    var bytes = 0L
+    val iter = files.collect { case a: AddFile => a }.iterator
+    while (iter.hasNext) {
+      val file = iter.next()
+      distinctValues += file.partitionValues
+      bytes += file.size
+    }
+    // If the only distinct value map is an empty map, then it must be an unpartitioned table.
+    // Return 0 in that case.
+    val numDistinctValues =
+    if (distinctValues.size == 1 && distinctValues.head.isEmpty) 0 else distinctValues.size
+    (bytes, numDistinctValues)
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransaction.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransaction.scala
new file mode 100644
index 00000000000..e06aba55487
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransaction.scala
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import java.net.URI
+
+import scala.collection.mutable.ListBuffer
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.{AddFile, FileAction}
+import com.databricks.sql.transaction.tahoe.constraints.{Constraint, Constraints}
+import com.databricks.sql.transaction.tahoe.schema.InvariantViolationException
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids._
+import com.nvidia.spark.rapids.delta._
+import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormatWriter}
+import org.apache.spark.sql.functions.to_json
+import org.apache.spark.sql.rapids.{BasicColumnarWriteJobStatsTracker, ColumnarWriteJobStatsTracker, GpuFileFormatWriter, GpuWriteJobStatsTracker}
+import org.apache.spark.sql.rapids.delta.GpuIdentityColumn
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.{Clock, SerializableConfiguration}
+
+/**
+ * Used to perform a set of reads in a transaction and then commit a set of updates to the
+ * state of the log.  All reads from the DeltaLog, MUST go through this instance rather
+ * than directly to the DeltaLog otherwise they will not be check for logical conflicts
+ * with concurrent updates.
+ *
+ * This class is not thread-safe.
+ *
+ * @param deltaLog The Delta Log for the table this transaction is modifying.
+ * @param snapshot The snapshot that this transaction is reading at.
+ * @param rapidsConf RAPIDS Accelerator config settings.
+ */
+class GpuOptimisticTransaction(
+    deltaLog: DeltaLog,
+    snapshot: Snapshot,
+    rapidsConf: RapidsConf)(implicit clock: Clock)
+    extends GpuOptimisticTransactionBase(deltaLog, snapshot, rapidsConf)(clock) {
+
+  /** Creates a new OptimisticTransaction.
+   *
+   * @param deltaLog   The Delta Log for the table this transaction is modifying.
+   * @param rapidsConf RAPIDS Accelerator config settings
+   */
+  def this(deltaLog: DeltaLog, rapidsConf: RapidsConf)(implicit clock: Clock) = {
+    this(deltaLog, deltaLog.update(), rapidsConf)
+  }
+
+  private def getGpuStatsColExpr(
+      statsDataSchema: Seq[Attribute],
+      statsCollection: GpuStatisticsCollection): Expression = {
+    Dataset.ofRows(spark, LocalRelation(statsDataSchema))
+        .select(to_json(statsCollection.statsCollector))
+        .queryExecution.analyzed.expressions.head
+  }
+
+  /** Return the pair of optional stats tracker and stats collection class */
+  private def getOptionalGpuStatsTrackerAndStatsCollection(
+      output: Seq[Attribute],
+      partitionSchema: StructType, data: DataFrame): (
+      Option[GpuDeltaJobStatisticsTracker],
+          Option[GpuStatisticsCollection]) = {
+    if (spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_COLLECT_STATS)) {
+
+      val (statsDataSchema, statsCollectionSchema) = getStatsSchema(output, partitionSchema)
+
+      val indexedCols = DeltaConfigs.DATA_SKIPPING_NUM_INDEXED_COLS.fromMetaData(metadata)
+      val prefixLength =
+        spark.sessionState.conf.getConf(DeltaSQLConf.DATA_SKIPPING_STRING_PREFIX_LENGTH)
+      val tableSchema = {
+        // If collecting stats using the table schema, then pass in statsCollectionSchema.
+        // Otherwise pass in statsDataSchema to collect stats using the DataFrame schema.
+        if (spark.sessionState.conf.getConf(DeltaSQLConf
+            .DELTA_COLLECT_STATS_USING_TABLE_SCHEMA)) {
+          statsCollectionSchema.toStructType
+        } else {
+          statsDataSchema.toStructType
+        }
+      }
+
+      val _spark = spark
+      val protocol = deltaLog.unsafeVolatileSnapshot.protocol
+
+      val statsCollection = new GpuStatisticsCollection {
+        override val spark = _spark
+        override val deletionVectorsSupported =
+          protocol.isFeatureSupported(DeletionVectorsTableFeature)
+        override val tableDataSchema = tableSchema
+        override val dataSchema = statsDataSchema.toStructType
+        override val numIndexedCols = indexedCols
+        override val stringPrefixLength: Int = prefixLength
+      }
+
+      val statsColExpr = getGpuStatsColExpr(statsDataSchema, statsCollection)
+
+      val statsSchema = statsCollection.statCollectionSchema
+      val explodedDataSchema = statsCollection.explodedDataSchema
+      val batchStatsToRow = (batch: ColumnarBatch, row: InternalRow) => {
+        GpuStatisticsCollection.batchStatsToRow(statsSchema, explodedDataSchema, batch, row)
+      }
+      (Some(new GpuDeltaJobStatisticsTracker(statsDataSchema, statsColExpr, batchStatsToRow)),
+          Some(statsCollection))
+    } else {
+      (None, None)
+    }
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      writeOptions: Option[DeltaOptions],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    hasWritten = true
+
+    val spark = inputData.sparkSession
+    val (data, partitionSchema) = performCDCPartition(inputData)
+    val outputPath = deltaLog.dataPath
+
+    val (normalizedQueryExecution, output, generatedColumnConstraints, dataHighWaterMarks) = {
+      // TODO: is none ok to pass here?
+      normalizeData(deltaLog, None, data)
+    }
+    val highWaterMarks = trackHighWaterMarks.getOrElse(dataHighWaterMarks)
+
+    // Build a new plan with a stub GpuDeltaWrite node to work around undesired transitions between
+    // columns and rows when AQE is involved. Without this node in the plan, AdaptiveSparkPlanExec
+    // could be the root node of the plan. In that case we do not have enough context to know
+    // whether the AdaptiveSparkPlanExec should be columnar or not, since the GPU overrides do not
+    // see how the parent is using the AdaptiveSparkPlanExec outputs. By using this stub node that
+    // appears to be a data writing node to AQE (it derives from V2CommandExec), the
+    // AdaptiveSparkPlanExec will be planned as a child of this new node. That provides enough
+    // context to plan the AQE sub-plan properly with respect to columnar and row transitions.
+    // We could force the AQE node to be columnar here by explicitly replacing the node, but that
+    // breaks the connection between the queryExecution and the node that will actually execute.
+    val gpuWritePlan = Dataset.ofRows(spark, RapidsDeltaWrite(normalizedQueryExecution.logical))
+    val queryExecution = gpuWritePlan.queryExecution
+
+    val partitioningColumns = getPartitioningColumns(partitionSchema, output)
+
+    val committer = getCommitter(outputPath)
+
+    // If Statistics Collection is enabled, then create a stats tracker that will be injected during
+    // the FileFormatWriter.write call below and will collect per-file stats using
+    // StatisticsCollection
+    val (optionalStatsTracker, _) = getOptionalGpuStatsTrackerAndStatsCollection(output,
+      partitionSchema, data)
+
+    // schema should be normalized, therefore we can do an equality check
+    val (statsDataSchema, _) = getStatsSchema(output, partitionSchema)
+    val identityTracker = GpuIdentityColumn.createIdentityColumnStatsTracker(
+      spark,
+      statsDataSchema,
+      metadata.schema,
+      highWaterMarks)
+
+    val constraints =
+      Constraints.getAll(metadata, spark) ++ generatedColumnConstraints ++ additionalConstraints
+
+    val isOptimize = isOptimizeCommand(queryExecution.analyzed)
+
+    SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) {
+      val outputSpec = FileFormatWriter.OutputSpec(
+        outputPath.toString,
+        Map.empty,
+        output)
+
+      // Remove any unnecessary row conversions added as part of Spark planning
+      val queryPhysicalPlan = queryExecution.executedPlan match {
+        case GpuColumnarToRowExec(child, _) => child
+        case p => p
+      }
+      val gpuRapidsWrite = queryPhysicalPlan match {
+        case g: GpuRapidsDeltaWriteExec => Some(g)
+        case _ => None
+      }
+
+      val empty2NullPlan = convertEmptyToNullIfNeeded(queryPhysicalPlan,
+        partitioningColumns, constraints)
+      val optimizedPlan =
+        applyOptimizeWriteIfNeeded(spark, empty2NullPlan, partitionSchema, isOptimize)
+      val planWithInvariants = addInvariantChecks(optimizedPlan, constraints)
+      val physicalPlan = convertToGpu(planWithInvariants)
+
+      val statsTrackers: ListBuffer[ColumnarWriteJobStatsTracker] = ListBuffer()
+
+      val hadoopConf = spark.sessionState.newHadoopConfWithOptions(
+        metadata.configuration ++ deltaLog.options)
+
+      if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) {
+        val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
+        val basicWriteJobStatsTracker = new BasicColumnarWriteJobStatsTracker(
+          serializableHadoopConf,
+          BasicWriteJobStatsTracker.metrics)
+        registerSQLMetrics(spark, basicWriteJobStatsTracker.driverSideMetrics)
+        statsTrackers.append(basicWriteJobStatsTracker)
+        gpuRapidsWrite.foreach { grw =>
+          val tracker = new GpuWriteJobStatsTracker(serializableHadoopConf,
+            grw.basicMetrics, grw.taskMetrics)
+          statsTrackers.append(tracker)
+        }
+      }
+
+      // Retain only a minimal selection of Spark writer options to avoid any potential
+      // compatibility issues
+      val options = writeOptions match {
+        case None => Map.empty[String, String]
+        case Some(writeOptions) =>
+          writeOptions.options.filterKeys { key =>
+            key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) ||
+                key.equalsIgnoreCase(DeltaOptions.COMPRESSION)
+          }.toMap
+      }
+      val deltaFileFormat = deltaLog.fileFormat(deltaLog.unsafeVolatileSnapshot.protocol, metadata)
+      val gpuFileFormat = if (deltaFileFormat.getClass == classOf[DeltaParquetFileFormat]) {
+        new GpuParquetFileFormat
+      } else {
+        throw new IllegalStateException(s"file format $deltaFileFormat is not supported")
+      }
+
+      try {
+        logDebug(s"Physical plan for write:\n$physicalPlan")
+        GpuFileFormatWriter.write(
+          sparkSession = spark,
+          plan = physicalPlan,
+          fileFormat = gpuFileFormat,
+          committer = committer,
+          outputSpec = outputSpec,
+          hadoopConf = hadoopConf,
+          partitionColumns = partitioningColumns,
+          bucketSpec = None,
+          statsTrackers = optionalStatsTracker.toSeq ++ identityTracker.toSeq ++ statsTrackers,
+          options = options,
+          rapidsConf.stableSort,
+          rapidsConf.concurrentWriterPartitionFlushSize)
+      } catch {
+        case s: SparkException =>
+          // Pull an InvariantViolationException up to the top level if it was the root cause.
+          val violationException = ExceptionUtils.getRootCause(s)
+          if (violationException.isInstanceOf[InvariantViolationException]) {
+            throw violationException
+          } else {
+            throw s
+          }
+      }
+    }
+
+    val resultFiles = committer.addedStatuses.map { a =>
+      a.copy(stats = optionalStatsTracker.map(
+        _.recordedStats(new Path(new URI(a.path)).getName)).getOrElse(a.stats))
+    }.filter {
+      // In some cases, we can write out an empty `inputData`. Some examples of this (though, they
+      // may be fixed in the future) are the MERGE command when you delete with empty source, or
+      // empty target, or on disjoint tables. This is hard to catch before the write without
+      // collecting the DF ahead of time. Instead, we can return only the AddFiles that
+      // a) actually add rows, or
+      // b) don't have any stats so we don't know the number of rows at all
+      case a: AddFile => a.numLogicalRecords.forall(_ > 0)
+      case _ => true
+    }
+
+    identityTracker.foreach { tracker =>
+      updatedIdentityHighWaterMarks.appendAll(tracker.highWaterMarks.toSeq)
+    }
+    val fileActions = resultFiles.toSeq ++ committer.changeFiles
+
+    // Check if auto-compaction is enabled.
+    // (Auto compaction checks are derived from the work in
+    //  https://github.com/delta-io/delta/pull/1156).
+    lazy val autoCompactEnabled =
+      spark.sessionState.conf
+        .getConf[String](DeltaSQLConf.DELTA_AUTO_COMPACT_ENABLED)
+        .getOrElse {
+          DeltaConfigs.AUTO_COMPACT.fromMetaData(metadata)
+            .getOrElse("false")
+        }.toBoolean
+
+    if (!isOptimize && autoCompactEnabled && fileActions.nonEmpty) {
+      registerPostCommitHook(GpuDoAutoCompaction)
+    }
+
+    fileActions
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
new file mode 100644
index 00000000000..40e2651505c
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimisticTransactionBase.scala
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from OptimisticTransaction.scala and TransactionalWrite.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.actions.FileAction
+import com.databricks.sql.transaction.tahoe.constraints.{Constraint, DeltaInvariantCheckerExec}
+import com.databricks.sql.transaction.tahoe.files.TahoeBatchFileIndex
+import com.databricks.sql.transaction.tahoe.metering.DeltaLogging
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, NamedExpression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.rapids.GpuShuffleEnv
+import org.apache.spark.sql.rapids.GpuV1WriteUtils.GpuEmpty2Null
+import org.apache.spark.sql.rapids.delta.{DeltaShufflePartitionsUtil, GpuOptimizeWriteExchangeExec, OptimizeWriteExchangeExec}
+import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.util.Clock
+
+/**
+ * Used to perform a set of reads in a transaction and then commit a set of updates to the
+ * state of the log.  All reads from the DeltaLog, MUST go through this instance rather
+ * than directly to the DeltaLog otherwise they will not be check for logical conflicts
+ * with concurrent updates.
+ *
+ * This class is not thread-safe.
+ *
+ * @param deltaLog The Delta Log for the table this transaction is modifying.
+ * @param snapshot The snapshot that this transaction is reading at.
+ * @param rapidsConf RAPIDS Accelerator config settings.
+ */
+abstract class GpuOptimisticTransactionBase
+    (deltaLog: DeltaLog, snapshot: Snapshot, val rapidsConf: RapidsConf)
+    (implicit clock: Clock)
+  extends OptimisticTransaction(deltaLog, Option.empty[CatalogTable], snapshot)
+  with DeltaLogging {
+
+  /**
+   * Adds checking of constraints on the table
+   * @param plan Plan to generate the table to check against constraints
+   * @param constraints Constraints to check on the table
+   * @return GPU columnar plan to execute
+   */
+  protected def addInvariantChecks(plan: SparkPlan, constraints: Seq[Constraint]): SparkPlan = {
+    val cpuInvariants =
+      DeltaInvariantCheckerExec.buildInvariantChecks(plan.output, constraints, plan.session)
+    GpuCheckDeltaInvariant.maybeConvertToGpu(cpuInvariants, rapidsConf) match {
+      case Some(gpuInvariants) =>
+        val gpuPlan = convertToGpu(plan)
+        GpuDeltaInvariantCheckerExec(gpuPlan, gpuInvariants)
+      case None =>
+        val cpuPlan = convertToCpu(plan)
+        DeltaInvariantCheckerExec(cpuPlan, constraints)
+    }
+  }
+
+  /** GPU version of convertEmptyToNullIfNeeded */
+  private def gpuConvertEmptyToNullIfNeeded(
+      plan: GpuExec,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    if (!spark.conf.get(DeltaSQLConf.CONVERT_EMPTY_TO_NULL_FOR_STRING_PARTITION_COL)) {
+      return plan
+    }
+    // No need to convert if there are no constraints. The empty strings will be converted later by
+    // FileFormatWriter and FileFormatDataWriter. Note that we might still do unnecessary convert
+    // here as the constraints might not be related to the string partition columns. A precise
+    // check will need to walk the constraints to see if such columns are really involved. It
+    // doesn't seem to worth the effort.
+    if (constraints.isEmpty) return plan
+
+    val partSet = AttributeSet(partCols)
+    var needConvert = false
+    val projectList: Seq[NamedExpression] = plan.output.map {
+      case p if partSet.contains(p) && p.dataType == StringType =>
+        needConvert = true
+        GpuAlias(GpuEmpty2Null(p), p.name)()
+      case attr => attr
+    }
+    if (needConvert) GpuProjectExec(projectList.toList, plan) else plan
+  }
+
+  /**
+   * If there is any string partition column and there are constraints defined, add a projection to
+   * convert empty string to null for that column. The empty strings will be converted to null
+   * eventually even without this convert, but we want to do this earlier before check constraints
+   * so that empty strings are correctly rejected. Note that this should not cause the downstream
+   * logic in `FileFormatWriter` to add duplicate conversions because the logic there checks the
+   * partition column using the original plan's output. When the plan is modified with additional
+   * projections, the partition column check won't match and will not add more conversion.
+   *
+   * @param plan The original SparkPlan.
+   * @param partCols The partition columns.
+   * @param constraints The defined constraints.
+   * @return A SparkPlan potentially modified with an additional projection on top of `plan`
+   */
+  override def convertEmptyToNullIfNeeded(
+      plan: SparkPlan,
+      partCols: Seq[Attribute],
+      constraints: Seq[Constraint]): SparkPlan = {
+    // Reuse the CPU implementation if the plan ends up on the CPU, otherwise do the
+    // equivalent on the GPU.
+    plan match {
+      case g: GpuExec => gpuConvertEmptyToNullIfNeeded(g, partCols, constraints)
+      case _ => super.convertEmptyToNullIfNeeded(plan, partCols, constraints)
+    }
+  }
+
+  override def writeFiles(
+      inputData: Dataset[_],
+      additionalConstraints: Seq[Constraint]): Seq[FileAction] = {
+    writeFiles(inputData, None, additionalConstraints)
+  }
+
+  protected def applyOptimizeWriteIfNeeded(
+      spark: SparkSession,
+      physicalPlan: SparkPlan,
+      partitionSchema: StructType,
+      isOptimize: Boolean): SparkPlan = {
+    val optimizeWriteEnabled = !isOptimize &&
+        spark.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_WRITE_ENABLED)
+            .orElse(DeltaConfigs.OPTIMIZE_WRITE.fromMetaData(metadata)).getOrElse(false)
+    if (optimizeWriteEnabled) {
+      val planWithoutTopRepartition =
+        DeltaShufflePartitionsUtil.removeTopRepartition(physicalPlan)
+      val partitioning = DeltaShufflePartitionsUtil.partitioningForRebalance(
+        physicalPlan.output, partitionSchema, spark.sessionState.conf.numShufflePartitions)
+      planWithoutTopRepartition match {
+        case p: GpuExec =>
+          val partMeta = GpuOverrides.wrapPart(partitioning, rapidsConf, None)
+          partMeta.tagForGpu()
+          if (partMeta.canThisBeReplaced) {
+            val plan = GpuOptimizeWriteExchangeExec(partMeta.convertToGpu(), p)
+            if (GpuShuffleEnv.useGPUShuffle(rapidsConf)) {
+              GpuCoalesceBatches(plan, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+            } else {
+              GpuShuffleCoalesceExec(plan, rapidsConf.gpuTargetBatchSizeBytes)
+            }
+          } else {
+            GpuColumnarToRowExec(OptimizeWriteExchangeExec(partitioning, p))
+          }
+        case p =>
+          OptimizeWriteExchangeExec(partitioning, p)
+      }
+    } else {
+      physicalPlan
+    }
+  }
+
+  protected def isOptimizeCommand(plan: LogicalPlan): Boolean = {
+    val leaves = plan.collectLeaves()
+    leaves.size == 1 && leaves.head.collect {
+      case LogicalRelation(HadoopFsRelation(
+      index: TahoeBatchFileIndex, _, _, _, _, _), _, _, _) =>
+        index.actionType.equals("Optimize")
+    }.headOption.getOrElse(false)
+  }
+
+  protected def convertToCpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuRowToColumnarExec(p, _) => p
+    case p: GpuExec => GpuColumnarToRowExec(p)
+    case p => p
+  }
+
+  protected def convertToGpu(plan: SparkPlan): SparkPlan = plan match {
+    case GpuColumnarToRowExec(p, _) => p
+    case p: GpuExec => p
+    case p => GpuRowToColumnarExec(p, TargetSize(rapidsConf.gpuTargetBatchSizeBytes))
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimizeExecutor.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimizeExecutor.scala
new file mode 100644
index 00000000000..479776b760b
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuOptimizeExecutor.scala
@@ -0,0 +1,420 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from:
+ *  1. DoAutoCompaction.scala from PR#1156 at https://github.com/delta-io/delta/pull/1156,
+ *  2. OptimizeTableCommand.scala from the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.databricks.sql.transaction.tahoe.rapids
+
+import java.util.ConcurrentModificationException
+
+import scala.annotation.tailrec
+import scala.collection.mutable.ArrayBuffer
+
+import com.databricks.sql.io.skipping.MultiDimClustering
+import com.databricks.sql.io.skipping.liquid.{ClusteredTableUtils, ClusteringColumnInfo}
+import com.databricks.sql.transaction.tahoe._
+import com.databricks.sql.transaction.tahoe.DeltaOperations.Operation
+import com.databricks.sql.transaction.tahoe.actions.{Action, AddFile, FileAction, RemoveFile}
+import com.databricks.sql.transaction.tahoe.commands.DeltaCommand
+import com.databricks.sql.transaction.tahoe.commands.optimize._
+import com.databricks.sql.transaction.tahoe.files.SQLMetricsReporting
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids.delta.RapidsDeltaSQLConf
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext.SPARK_JOB_GROUP_ID
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric
+import org.apache.spark.util.ThreadUtils
+
+class GpuOptimizeExecutor(
+                        sparkSession: SparkSession,
+                        txn: OptimisticTransaction,
+                        partitionPredicate: Seq[Expression],
+                        zOrderByColumns: Seq[String],
+                        prevCommitActions: Seq[Action])
+  extends DeltaCommand with SQLMetricsReporting with Serializable {
+
+  /** Timestamp to use in [[FileAction]] */
+  private val operationTimestamp = System.currentTimeMillis
+
+  private val isMultiDimClustering = zOrderByColumns.nonEmpty
+  private val isAutoCompact = prevCommitActions.nonEmpty
+  private val optimizeType = GpuOptimizeType(isMultiDimClustering, isAutoCompact)
+
+  def optimize(): Seq[Row] = {
+    recordDeltaOperation(txn.deltaLog, "delta.optimize") {
+      val maxFileSize = optimizeType.maxFileSize
+      require(maxFileSize > 0, "maxFileSize must be > 0")
+
+      val minNumFilesInDir = optimizeType.minNumFiles
+      val (candidateFiles, filesToProcess) = optimizeType.targetFiles
+      val partitionSchema = txn.metadata.partitionSchema
+
+      // select all files in case of multi-dimensional clustering
+      val partitionsToCompact = filesToProcess
+        .groupBy(_.partitionValues)
+        .filter { case (_, filesInPartition) => filesInPartition.size >= minNumFilesInDir }
+        .toSeq
+
+      val groupedJobs = groupFilesIntoBins(partitionsToCompact, maxFileSize)
+      val jobs = optimizeType.targetBins(groupedJobs)
+
+      val maxThreads =
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_THREADS)
+      val updates = ThreadUtils.parmap(jobs, "OptimizeJob", maxThreads) { partitionBinGroup =>
+        runOptimizeBinJob(txn, partitionBinGroup._1, partitionBinGroup._2, maxFileSize)
+      }.flatten
+
+      val addedFiles = updates.collect { case a: AddFile => a }
+      val removedFiles = updates.collect { case r: RemoveFile => r }
+      if (addedFiles.nonEmpty) {
+        val operation = DeltaOperations.Optimize(partitionPredicate, zOrderByColumns)
+        val metrics = createMetrics(sparkSession.sparkContext, addedFiles, removedFiles)
+        commitAndRetry(txn, operation, updates, metrics) { newTxn =>
+          val newPartitionSchema = newTxn.metadata.partitionSchema
+          val candidateSetOld = candidateFiles.map(_.path).toSet
+          val candidateSetNew = newTxn.filterFiles(partitionPredicate).map(_.path).toSet
+
+          // As long as all of the files that we compacted are still part of the table,
+          // and the partitioning has not changed it is valid to continue to try
+          // and commit this checkpoint.
+          if (candidateSetOld.subsetOf(candidateSetNew) && partitionSchema == newPartitionSchema) {
+            true
+          } else {
+            val deleted = candidateSetOld -- candidateSetNew
+            logWarning(s"The following compacted files were delete " +
+              s"during checkpoint ${deleted.mkString(",")}. Aborting the compaction.")
+            false
+          }
+        }
+      }
+
+      val optimizeStats = OptimizeStats()
+      optimizeStats.addedFilesSizeStats.merge(addedFiles)
+      optimizeStats.removedFilesSizeStats.merge(removedFiles)
+      optimizeStats.numPartitionsOptimized = jobs.map(j => j._1).distinct.size
+      optimizeStats.numBatches = jobs.size
+      optimizeStats.totalConsideredFiles = candidateFiles.size
+      optimizeStats.totalFilesSkipped = optimizeStats.totalConsideredFiles - removedFiles.size
+      optimizeStats.totalClusterParallelism = sparkSession.sparkContext.defaultParallelism
+
+      if (isMultiDimClustering) {
+        val inputFileStats =
+          ZOrderFileStats(removedFiles.size, removedFiles.map(_.size.getOrElse(0L)).sum)
+        optimizeStats.zOrderStats = Some(ZOrderStats(
+          strategyName = "all", // means process all files in a partition
+          inputCubeFiles = ZOrderFileStats(0, 0),
+          inputOtherFiles = inputFileStats,
+          inputNumCubes = 0,
+          mergedFiles = inputFileStats,
+          // There will one z-cube for each partition
+          numOutputCubes = optimizeStats.numPartitionsOptimized))
+      }
+
+      return Seq(Row(txn.deltaLog.dataPath.toString, optimizeStats.toOptimizeMetrics))
+    }
+  }
+
+  /**
+   * Utility methods to group files into bins for optimize.
+   *
+   * @param partitionsToCompact List of files to compact group by partition.
+   *                            Partition is defined by the partition values (partCol -> partValue)
+   * @param maxTargetFileSize Max size (in bytes) of the compaction output file.
+   * @return Sequence of bins. Each bin contains one or more files from the same
+   *         partition and targeted for one output file.
+   */
+  private def groupFilesIntoBins(
+    partitionsToCompact: Seq[(Map[String, String], Seq[AddFile])],
+    maxTargetFileSize: Long): Seq[(Map[String, String], Seq[AddFile])] = {
+
+    partitionsToCompact.flatMap {
+      case (partition, files) =>
+        val bins = new ArrayBuffer[Seq[AddFile]]()
+
+        val currentBin = new ArrayBuffer[AddFile]()
+        var currentBinSize = 0L
+
+        files.sortBy(_.size).foreach { file =>
+          // Generally, a bin is a group of existing files, whose total size does not exceed the
+          // desired maxFileSize. They will be coalesced into a single output file.
+          // However, if isMultiDimClustering = true, all files in a partition will be read by the
+          // same job, the data will be range-partitioned and numFiles = totalFileSize / maxFileSize
+          // will be produced. See below.
+          if (file.size + currentBinSize > maxTargetFileSize && !isMultiDimClustering) {
+            bins += currentBin.toVector
+            currentBin.clear()
+            currentBin += file
+            currentBinSize = file.size
+          } else {
+            currentBin += file
+            currentBinSize += file.size
+          }
+        }
+
+        if (currentBin.nonEmpty) {
+          bins += currentBin.toVector
+        }
+
+        bins.map(b => (partition, b))
+          // select bins that have at least two files or in case of multi-dim clustering
+          // select all bins
+          .filter(_._2.size > 1 || isMultiDimClustering)
+    }
+  }
+
+  private val isClusteredTable = ClusteredTableUtils.isSupported(txn.snapshot.protocol)
+
+  private val clusteringColumns: Seq[String] = {
+    if (zOrderByColumns.nonEmpty) {
+      zOrderByColumns
+    } else if (isClusteredTable) {
+      ClusteringColumnInfo.extractLogicalNames(txn.snapshot)
+    } else {
+      Nil
+    }
+  }
+
+  /**
+   * Utility method to run a Spark job to compact the files in given bin
+   *
+   * @param txn [[OptimisticTransaction]] instance in use to commit the changes to DeltaLog.
+   * @param partition Partition values of the partition that files in [[bin]] belongs to.
+   * @param bin List of files to compact into one large file.
+   * @param maxFileSize Targeted output file size in bytes
+   */
+  private def runOptimizeBinJob(
+                                 txn: OptimisticTransaction,
+                                 partition: Map[String, String],
+                                 bin: Seq[AddFile],
+                                 maxFileSize: Long): Seq[FileAction] = {
+    val baseTablePath = txn.deltaLog.dataPath
+
+    val input = txn.deltaLog.createDataFrame(txn.snapshot, bin, actionTypeOpt = Some("Optimize"))
+    val repartitionDF = if (isMultiDimClustering) {
+      val totalSize = bin.map(_.size).sum
+      val approxNumFiles = Math.max(1, totalSize / maxFileSize).toInt
+      MultiDimClustering.cluster(
+        input,
+        approxNumFiles,
+        clusteringColumns,
+        "zorder")
+    } else {
+      val useRepartition = sparkSession.sessionState.conf.getConf(
+        DeltaSQLConf.DELTA_OPTIMIZE_REPARTITION_ENABLED)
+      if (useRepartition) {
+        input.repartition(numPartitions = 1)
+      } else {
+        input.coalesce(numPartitions = 1)
+      }
+    }
+
+    val partitionDesc = partition.toSeq.map(entry => entry._1 + "=" + entry._2).mkString(",")
+
+    val partitionName = if (partition.isEmpty) "" else s" in partition ($partitionDesc)"
+    val description = s"$baseTablePath<br/>Optimizing ${bin.size} files" + partitionName
+    sparkSession.sparkContext.setJobGroup(
+      sparkSession.sparkContext.getLocalProperty(SPARK_JOB_GROUP_ID),
+      description)
+
+    val addFiles = txn.writeFiles(repartitionDF).collect {
+      case a: AddFile =>
+        a.copy(dataChange = false)
+      case other =>
+        throw new IllegalStateException(
+          s"Unexpected action $other with type ${other.getClass}. File compaction job output" +
+            s"should only have AddFiles")
+    }
+    val removeFiles = bin.map(f => f.removeWithTimestamp(operationTimestamp, dataChange = false))
+    val updates = addFiles ++ removeFiles
+    updates
+  }
+
+  private type PartitionedBin = (Map[String, String], Seq[AddFile])
+
+  private trait GpuOptimizeType {
+    def minNumFiles: Long
+
+    def maxFileSize: Long =
+      sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_OPTIMIZE_MAX_FILE_SIZE)
+
+    def targetFiles: (Seq[AddFile], Seq[AddFile])
+
+    def targetBins(jobs: Seq[PartitionedBin]): Seq[PartitionedBin] = jobs
+  }
+
+  private case class GpuCompaction() extends GpuOptimizeType {
+    def minNumFiles: Long = 2
+
+    def targetFiles: (Seq[AddFile], Seq[AddFile]) = {
+      val minFileSize = sparkSession.sessionState.conf.getConf(
+        DeltaSQLConf.DELTA_OPTIMIZE_MIN_FILE_SIZE)
+      require(minFileSize > 0, "minFileSize must be > 0")
+      val candidateFiles = txn.filterFiles(partitionPredicate)
+      val filesToProcess = candidateFiles.filter(_.size < minFileSize)
+      (candidateFiles, filesToProcess)
+    }
+  }
+
+  private case class GpuMultiDimOrdering() extends GpuOptimizeType {
+    def minNumFiles: Long = 1
+
+    def targetFiles: (Seq[AddFile], Seq[AddFile]) = {
+      // select all files in case of multi-dimensional clustering
+      val candidateFiles = txn.filterFiles(partitionPredicate)
+      (candidateFiles, candidateFiles)
+    }
+  }
+
+  private case class GpuAutoCompaction() extends GpuOptimizeType {
+    def minNumFiles: Long = {
+      val minNumFiles =
+        sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_AUTO_COMPACT_MIN_NUM_FILES)
+      require(minNumFiles > 0, "minNumFiles must be > 0")
+      minNumFiles
+    }
+
+    override def maxFileSize: Long =
+      sparkSession.sessionState.conf.getConf(DeltaSQLConf.DELTA_AUTO_COMPACT_MAX_FILE_SIZE)
+        .getOrElse(128 * 1024 * 1024)
+
+    override def targetFiles: (Seq[AddFile], Seq[AddFile]) = {
+      val autoCompactTarget =
+        sparkSession.sessionState.conf.getConf(RapidsDeltaSQLConf.AUTO_COMPACT_TARGET)
+      // Filter the candidate files according to autoCompact.target config.
+      lazy val addedFiles = prevCommitActions.collect { case a: AddFile => a }
+      val candidateFiles = autoCompactTarget match {
+        case "table" =>
+          txn.filterFiles()
+        case "commit" =>
+          addedFiles
+        case "partition" =>
+          val eligiblePartitions = addedFiles.map(_.partitionValues).toSet
+          txn.filterFiles().filter(f => eligiblePartitions.contains(f.partitionValues))
+        case _ =>
+          logError(s"Invalid config for autoCompact.target: $autoCompactTarget. " +
+            s"Falling back to the default value 'table'.")
+          txn.filterFiles()
+      }
+      val filesToProcess = candidateFiles.filter(_.size < maxFileSize)
+      (candidateFiles, filesToProcess)
+    }
+
+    override def targetBins(jobs: Seq[PartitionedBin]): Seq[PartitionedBin] = {
+      var acc = 0L
+      val maxCompactBytes =
+        sparkSession.sessionState.conf.getConf(RapidsDeltaSQLConf.AUTO_COMPACT_MAX_COMPACT_BYTES)
+      // bins with more files are prior to less files.
+      jobs
+        .sortBy { case (_, filesInBin) => -filesInBin.length }
+        .takeWhile { case (_, filesInBin) =>
+          acc += filesInBin.map(_.size).sum
+          acc <= maxCompactBytes
+        }
+    }
+  }
+
+  private object GpuOptimizeType {
+
+    def apply(isMultiDimClustering: Boolean, isAutoCompact: Boolean): GpuOptimizeType = {
+      if (isMultiDimClustering) {
+        GpuMultiDimOrdering()
+      } else if (isAutoCompact) {
+        GpuAutoCompaction()
+      } else {
+        GpuCompaction()
+      }
+    }
+  }
+
+  /**
+   * Attempts to commit the given actions to the log. In the case of a concurrent update,
+   * the given function will be invoked with a new transaction to allow custom conflict
+   * detection logic to indicate it is safe to try again, by returning `true`.
+   *
+   * This function will continue to try to commit to the log as long as `f` returns `true`,
+   * otherwise throws a subclass of [[ConcurrentModificationException]].
+   */
+  @tailrec
+  private def commitAndRetry(
+                              txn: OptimisticTransaction,
+                              optimizeOperation: Operation,
+                              actions: Seq[Action],
+                              metrics: Map[String, SQLMetric])(f: OptimisticTransaction => Boolean)
+  : Unit = {
+    try {
+      txn.registerSQLMetrics(sparkSession, metrics)
+      txn.commit(actions, optimizeOperation)
+    } catch {
+      case e: ConcurrentModificationException =>
+        val newTxn = txn.deltaLog.startTransaction(Option.empty[CatalogTable])
+        if (f(newTxn)) {
+          logInfo("Retrying commit after checking for semantic conflicts with concurrent updates.")
+          commitAndRetry(newTxn, optimizeOperation, actions, metrics)(f)
+        } else {
+          logWarning("Semantic conflicts detected. Aborting operation.")
+          throw e
+        }
+    }
+  }
+
+  /** Create a map of SQL metrics for adding to the commit history. */
+  private def createMetrics(
+                             sparkContext: SparkContext,
+                             addedFiles: Seq[AddFile],
+                             removedFiles: Seq[RemoveFile]): Map[String, SQLMetric] = {
+
+    def setAndReturnMetric(description: String, value: Long) = {
+      val metric = createMetric(sparkContext, description)
+      metric.set(value)
+      metric
+    }
+
+    def totalSize(actions: Seq[FileAction]): Long = {
+      var totalSize = 0L
+      actions.foreach { file =>
+        val fileSize = file match {
+          case addFile: AddFile => addFile.size
+          case removeFile: RemoveFile => removeFile.size.getOrElse(0L)
+          case default =>
+            throw new IllegalArgumentException(s"Unknown FileAction type: ${default.getClass}")
+        }
+        totalSize += fileSize
+      }
+      totalSize
+    }
+
+    val sizeStats = FileSizeStatsWithHistogram.create(addedFiles.map(_.size).sorted)
+    Map[String, SQLMetric](
+      "minFileSize" -> setAndReturnMetric("minimum file size", sizeStats.get.min),
+      "p25FileSize" -> setAndReturnMetric("25th percentile file size", sizeStats.get.p25),
+      "p50FileSize" -> setAndReturnMetric("50th percentile file size", sizeStats.get.p50),
+      "p75FileSize" -> setAndReturnMetric("75th percentile file size", sizeStats.get.p75),
+      "maxFileSize" -> setAndReturnMetric("maximum file size", sizeStats.get.max),
+      "numAddedFiles" -> setAndReturnMetric("total number of files added.", addedFiles.size),
+      "numRemovedFiles" -> setAndReturnMetric("total number of files removed.", removedFiles.size),
+      "numAddedBytes" -> setAndReturnMetric("total number of bytes added", totalSize(addedFiles)),
+      "numRemovedBytes" ->
+        setAndReturnMetric("total number of bytes removed", totalSize(removedFiles)))
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuUpdateCommand.scala b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuUpdateCommand.scala
new file mode 100644
index 00000000000..b158062cf60
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/databricks/sql/transaction/tahoe/rapids/GpuUpdateCommand.scala
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * This file was derived from UpdateCommand.scala
+ * in the Delta Lake project at https://github.com/delta-io/delta.
+ *
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.sql.transaction.tahoe.rapids
+
+import com.databricks.sql.transaction.tahoe.{DeltaLog, DeltaOperations, DeltaTableUtils, DeltaUDF, OptimisticTransaction}
+import com.databricks.sql.transaction.tahoe.DeltaCommitTag._
+import com.databricks.sql.transaction.tahoe.RowTracking
+import com.databricks.sql.transaction.tahoe.actions.{AddCDCFile, AddFile, FileAction}
+import com.databricks.sql.transaction.tahoe.commands.{DeltaCommand, DMLUtils, UpdateCommand, UpdateMetric}
+import com.databricks.sql.transaction.tahoe.files.{TahoeBatchFileIndex, TahoeFileIndex}
+import com.nvidia.spark.rapids.delta.GpuDeltaMetricUpdateUDF
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkContext
+import org.apache.spark.sql.{Column, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.command.LeafRunnableCommand
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.metric.SQLMetrics.{createMetric, createTimingMetric}
+import org.apache.spark.sql.functions.input_file_name
+import org.apache.spark.sql.types.LongType
+
+case class GpuUpdateCommand(
+    gpuDeltaLog: GpuDeltaLog,
+    tahoeFileIndex: TahoeFileIndex,
+    target: LogicalPlan,
+    updateExpressions: Seq[Expression],
+    condition: Option[Expression])
+    extends LeafRunnableCommand with DeltaCommand {
+
+  override val output: Seq[Attribute] = {
+    Seq(AttributeReference("num_affected_rows", LongType)())
+  }
+
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(target)
+
+  @transient private lazy val sc: SparkContext = SparkContext.getOrCreate()
+
+  override lazy val metrics = Map[String, SQLMetric](
+    "numAddedFiles" -> createMetric(sc, "number of files added."),
+    "numRemovedFiles" -> createMetric(sc, "number of files removed."),
+    "numUpdatedRows" -> createMetric(sc, "number of rows updated."),
+    "numCopiedRows" -> createMetric(sc, "number of rows copied."),
+    "executionTimeMs" ->
+        createTimingMetric(sc, "time taken to execute the entire operation"),
+    "scanTimeMs" ->
+        createTimingMetric(sc, "time taken to scan the files for matches"),
+    "rewriteTimeMs" ->
+        createTimingMetric(sc, "time taken to rewrite the matched files"),
+    "numAddedChangeFiles" -> createMetric(sc, "number of change data capture files generated"),
+    "changeFileBytes" -> createMetric(sc, "total size of change data capture files generated"),
+    "numTouchedRows" -> createMetric(sc, "number of rows touched (copied + updated)"),
+    "numDeletionVectorsAdded" -> createMetric(sc, "number of deletion vectors added."),
+    "numDeletionVectorsRemoved" -> createMetric(sc, "number of deletion vectors removed."),
+    "numDeletionVectorsUpdated" -> createMetric(sc, "number of deletion vectors updated.")
+  )
+
+  final override def run(sparkSession: SparkSession): Seq[Row] = {
+    recordDeltaOperation(tahoeFileIndex.deltaLog, "delta.dml.update") {
+      val deltaLog = tahoeFileIndex.deltaLog
+      gpuDeltaLog.withNewTransaction { txn =>
+        DeltaLog.assertRemovable(txn.snapshot)
+        performUpdate(sparkSession, deltaLog, txn)
+      }
+      // Re-cache all cached plans(including this relation itself, if it's cached) that refer to
+      // this data source relation.
+      sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, target)
+    }
+    Seq(Row(metrics("numUpdatedRows").value))
+  }
+
+  private def performUpdate(
+      sparkSession: SparkSession, deltaLog: DeltaLog, txn: OptimisticTransaction): Unit = {
+    import com.databricks.sql.transaction.tahoe.implicits._
+
+    var numTouchedFiles: Long = 0
+    var numRewrittenFiles: Long = 0
+    var numAddedChangeFiles: Long = 0
+    var changeFileBytes: Long = 0
+    var scanTimeMs: Long = 0
+    var rewriteTimeMs: Long = 0
+
+    val startTime = System.nanoTime()
+    val numFilesTotal = txn.snapshot.numOfFiles
+
+    val updateCondition = condition.getOrElse(Literal.TrueLiteral)
+    val (metadataPredicates, dataPredicates) =
+      DeltaTableUtils.splitMetadataAndDataPredicates(
+        updateCondition, txn.metadata.partitionColumns, sparkSession)
+    val candidateFiles = txn.filterFiles(metadataPredicates ++ dataPredicates)
+    val nameToAddFile = generateCandidateFileMap(deltaLog.dataPath, candidateFiles)
+
+    scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+
+    val filesToRewrite: Seq[AddFile] = if (candidateFiles.isEmpty) {
+      // Case 1: Do nothing if no row qualifies the partition predicates
+      // that are part of Update condition
+      Nil
+    } else if (dataPredicates.isEmpty) {
+      // Case 2: Update all the rows from the files that are in the specified partitions
+      // when the data filter is empty
+      candidateFiles
+    } else {
+      // Case 3: Find all the affected files using the user-specified condition
+      val fileIndex = new TahoeBatchFileIndex(
+        sparkSession, "update", candidateFiles, deltaLog, tahoeFileIndex.path, txn.snapshot)
+      // Keep everything from the resolved target except a new TahoeFileIndex
+      // that only involves the affected files instead of all files.
+      val newTarget = DeltaTableUtils.replaceFileIndex(target, fileIndex)
+      val data = Dataset.ofRows(sparkSession, newTarget)
+      val updatedRowCount = metrics("numUpdatedRows")
+      val updatedRowUdf = DeltaUDF.boolean {
+        new GpuDeltaMetricUpdateUDF(updatedRowCount)
+      }.asNondeterministic()
+      val pathsToRewrite =
+        withStatusCode("DELTA", UpdateCommand.FINDING_TOUCHED_FILES_MSG) {
+          data.filter(new Column(updateCondition))
+              .select(input_file_name())
+              .filter(updatedRowUdf())
+              .distinct()
+              .as[String]
+              .collect()
+        }
+
+      scanTimeMs = (System.nanoTime() - startTime) / 1000 / 1000
+
+      pathsToRewrite.map(getTouchedFile(deltaLog.dataPath, _, nameToAddFile)).toSeq
+    }
+
+    numTouchedFiles = filesToRewrite.length
+
+    val newActions = if (filesToRewrite.isEmpty) {
+      // Do nothing if no row qualifies the UPDATE condition
+      Nil
+    } else {
+      // Generate the new files containing the updated values
+      withStatusCode("DELTA", UpdateCommand.rewritingFilesMsg(filesToRewrite.size)) {
+        rewriteFiles(sparkSession, txn, tahoeFileIndex.path,
+          filesToRewrite.map(_.path), nameToAddFile, updateCondition)
+      }
+    }
+
+    rewriteTimeMs = (System.nanoTime() - startTime) / 1000 / 1000 - scanTimeMs
+
+    val (changeActions, addActions) = newActions.partition(_.isInstanceOf[AddCDCFile])
+    numRewrittenFiles = addActions.size
+    numAddedChangeFiles = changeActions.size
+    changeFileBytes = changeActions.collect { case f: AddCDCFile => f.size }.sum
+
+    val totalActions = if (filesToRewrite.isEmpty) {
+      // Do nothing if no row qualifies the UPDATE condition
+      Nil
+    } else {
+      // Delete the old files and return those delete actions along with the new AddFile actions for
+      // files containing the updated values
+      val operationTimestamp = System.currentTimeMillis()
+      val deleteActions = filesToRewrite.map(_.removeWithTimestamp(operationTimestamp))
+
+      deleteActions ++ newActions
+    }
+
+    if (totalActions.nonEmpty) {
+      metrics("numAddedFiles").set(numRewrittenFiles)
+      metrics("numAddedChangeFiles").set(numAddedChangeFiles)
+      metrics("changeFileBytes").set(changeFileBytes)
+      metrics("numRemovedFiles").set(numTouchedFiles)
+      metrics("executionTimeMs").set((System.nanoTime() - startTime) / 1000 / 1000)
+      metrics("scanTimeMs").set(scanTimeMs)
+      metrics("rewriteTimeMs").set(rewriteTimeMs)
+      // In the case where the numUpdatedRows is not captured, we can siphon out the metrics from
+      // the BasicWriteStatsTracker. This is for case 2 where the update condition contains only
+      // metadata predicates and so the entire partition is re-written.
+      val outputRows = txn.getMetric("numOutputRows").map(_.value).getOrElse(-1L)
+      if (metrics("numUpdatedRows").value == 0 && outputRows != 0 &&
+          metrics("numCopiedRows").value == 0) {
+        // We know that numTouchedRows = numCopiedRows + numUpdatedRows.
+        // Since an entire partition was re-written, no rows were copied.
+        // So numTouchedRows == numUpdateRows
+        metrics("numUpdatedRows").set(metrics("numTouchedRows").value)
+      } else {
+        // This is for case 3 where the update condition contains both metadata and data predicates
+        // so relevant files will have some rows updated and some rows copied. We don't need to
+        // consider case 1 here, where no files match the update condition, as we know that
+        // `totalActions` is empty.
+        metrics("numCopiedRows").set(
+          metrics("numTouchedRows").value - metrics("numUpdatedRows").value)
+      }
+      metrics("numDeletionVectorsAdded").set(0)
+      metrics("numDeletionVectorsRemoved").set(0)
+      metrics("numDeletionVectorsUpdated").set(0)
+      txn.registerSQLMetrics(sparkSession, metrics)
+      val tags = DMLUtils.TaggedCommitData.EMPTY
+        .withTag(PreservedRowTrackingTag, RowTracking.isEnabled(txn.protocol, txn.metadata))
+        .withTag(NoRowsCopiedTag, metrics("numCopiedRows").value == 0)
+      txn.commitIfNeeded(totalActions, DeltaOperations.Update(condition), tags.stringTags)
+      // This is needed to make the SQL metrics visible in the Spark UI
+      val executionId = sparkSession.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+      SQLMetrics.postDriverMetricUpdates(
+        sparkSession.sparkContext, executionId, metrics.values.toSeq)
+    }
+
+    recordDeltaEvent(
+      deltaLog,
+      "delta.dml.update.stats",
+      data = UpdateMetric(
+        condition = condition.map(_.sql).getOrElse("true"),
+        numFilesTotal,
+        numTouchedFiles,
+        numRewrittenFiles,
+        numAddedChangeFiles,
+        changeFileBytes,
+        scanTimeMs,
+        rewriteTimeMs,
+        // We don't support deletion vectors
+        numDeletionVectorsAdded = 0,
+        numDeletionVectorsRemoved = 0,
+        numDeletionVectorsUpdated = 0)
+    )
+  }
+
+  /**
+   * Scan all the affected files and write out the updated files.
+   *
+   * When CDF is enabled, includes the generation of CDC preimage and postimage columns for
+   * changed rows.
+   *
+   * @return the list of [[AddFile]]s and [[AddCDCFile]]s that have been written.
+   */
+  private def rewriteFiles(
+      spark: SparkSession,
+      txn: OptimisticTransaction,
+      rootPath: Path,
+      inputLeafFiles: Seq[String],
+      nameToAddFileMap: Map[String, AddFile],
+      condition: Expression): Seq[FileAction] = {
+    // Containing the map from the relative file path to AddFile
+    val baseRelation = buildBaseRelation(
+      spark, txn, "update", rootPath, inputLeafFiles, nameToAddFileMap)
+    val newTarget = DeltaTableUtils.replaceFileIndex(target, baseRelation.location)
+    val targetDf = Dataset.ofRows(spark, newTarget)
+
+    // Number of total rows that we have seen, i.e. are either copying or updating (sum of both).
+    // This will be used later, along with numUpdatedRows, to determine numCopiedRows.
+    val numTouchedRows = metrics("numTouchedRows")
+    val numTouchedRowsUdf = DeltaUDF.boolean {
+      new GpuDeltaMetricUpdateUDF(numTouchedRows)
+    }.asNondeterministic()
+
+    val updatedDataFrame = UpdateCommand.withUpdatedColumns(
+      target.output,
+      updateExpressions,
+      condition,
+      targetDf
+          .filter(numTouchedRowsUdf())
+          .withColumn(UpdateCommand.CONDITION_COLUMN_NAME, new Column(condition)),
+      UpdateCommand.shouldOutputCdc(txn))
+
+    txn.writeFiles(updatedDataFrame)
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala
new file mode 100644
index 00000000000..aa38d0460d1
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.spark.rapids.delta
+
+/**
+ * Implements the Delta Probe interface for probing the Delta Lake provider on Databricks.
+ * @note This is instantiated via reflection from ShimLoader.
+ */
+class DeltaProbeImpl extends DeltaProbe {
+  // Delta Lake is built-in for Databricks instances, so no probing is necessary.
+  override def getDeltaProvider: DeltaProvider = DeltaSpark341DBProvider
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaSpark341DBProvider.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaSpark341DBProvider.scala
new file mode 100644
index 00000000000..48673eaf167
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/DeltaSpark341DBProvider.scala
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta
+
+import com.databricks.sql.transaction.tahoe.rapids.GpuDeltaCatalog
+import com.nvidia.spark.rapids.{AtomicCreateTableAsSelectExecMeta, AtomicReplaceTableAsSelectExecMeta, GpuExec}
+
+import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec}
+import org.apache.spark.sql.execution.datasources.v2.rapids.{GpuAtomicCreateTableAsSelectExec, GpuAtomicReplaceTableAsSelectExec}
+
+object DeltaSpark341DBProvider extends DatabricksDeltaProviderBase {
+
+  override def convertToGpu(
+      cpuExec: AtomicCreateTableAsSelectExec,
+      meta: AtomicCreateTableAsSelectExecMeta): GpuExec = {
+    GpuAtomicCreateTableAsSelectExec(
+      cpuExec.output,
+      new GpuDeltaCatalog(cpuExec.catalog, meta.conf),
+      cpuExec.ident,
+      cpuExec.partitioning,
+      cpuExec.query,
+      cpuExec.tableSpec,
+      cpuExec.writeOptions,
+      cpuExec.ifNotExists)
+  }
+
+  override def convertToGpu(
+      cpuExec: AtomicReplaceTableAsSelectExec,
+      meta: AtomicReplaceTableAsSelectExecMeta): GpuExec = {
+    GpuAtomicReplaceTableAsSelectExec(
+      cpuExec.output,
+      new GpuDeltaCatalog(cpuExec.catalog, meta.conf),
+      cpuExec.ident,
+      cpuExec.partitioning,
+      cpuExec.query,
+      cpuExec.tableSpec,
+      cpuExec.writeOptions,
+      cpuExec.orCreate,
+      cpuExec.invalidateCache)
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
new file mode 100644
index 00000000000..abaece5feb3
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta
+
+import java.net.URI
+
+import com.databricks.sql.transaction.tahoe.{DeltaColumnMappingMode, DeltaParquetFileFormat, IdMapping}
+import com.databricks.sql.transaction.tahoe.DeltaParquetFileFormat.{DeletionVectorDescriptorWithFilterType, IS_ROW_DELETED_COLUMN_NAME}
+import com.nvidia.spark.rapids.{GpuMetric, RapidsConf, SparkPlanMeta}
+import com.nvidia.spark.rapids.delta.GpuDeltaParquetFileFormatUtils.addMetadataColumnToIterator
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+case class GpuDeltaParquetFileFormat(
+    override val columnMappingMode: DeltaColumnMappingMode,
+    override val referenceSchema: StructType,
+    isSplittable: Boolean,
+    disablePushDown: Boolean,
+    broadcastDvMap: Option[Broadcast[Map[URI, DeletionVectorDescriptorWithFilterType]]]
+) extends GpuDeltaParquetFileFormatBase {
+
+  if (columnMappingMode == IdMapping) {
+    val requiredReadConf = SQLConf.PARQUET_FIELD_ID_READ_ENABLED
+    require(SparkSession.getActiveSession.exists(_.sessionState.conf.getConf(requiredReadConf)),
+      s"${requiredReadConf.key} must be enabled to support Delta id column mapping mode")
+    val requiredWriteConf = SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED
+    require(SparkSession.getActiveSession.exists(_.sessionState.conf.getConf(requiredWriteConf)),
+      s"${requiredWriteConf.key} must be enabled to support Delta id column mapping mode")
+  }
+
+  override def isSplitable(
+      sparkSession: SparkSession,
+      options: Map[String, String],
+      path: Path): Boolean = isSplittable
+
+  override def buildReaderWithPartitionValuesAndMetrics(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration,
+      metrics: Map[String, GpuMetric],
+      alluxioPathReplacementMap: Option[Map[String, String]])
+  : PartitionedFile => Iterator[InternalRow] = {
+
+    val dataReader = super.buildReaderWithPartitionValuesAndMetrics(
+      sparkSession,
+      dataSchema,
+      partitionSchema,
+      requiredSchema,
+      filters,
+      options,
+      hadoopConf,
+      metrics,
+      alluxioPathReplacementMap)
+
+    val delVecs = broadcastDvMap
+    val maxDelVecScatterBatchSize = RapidsConf
+      .DELTA_LOW_SHUFFLE_MERGE_SCATTER_DEL_VECTOR_BATCH_SIZE
+      .get(sparkSession.sessionState.conf)
+
+    val delVecScatterTimeMetric = metrics(GpuMetric.DELETION_VECTOR_SCATTER_TIME)
+    val delVecSizeMetric = metrics(GpuMetric.DELETION_VECTOR_SIZE)
+
+    (file: PartitionedFile) => {
+      val input = dataReader(file)
+      val dv = delVecs.flatMap(_.value.get(new URI(file.filePath.toString())))
+        .map { dv =>
+          delVecSizeMetric += dv.descriptor.inlineData.length
+          RoaringBitmapWrapper.deserializeFromBytes(dv.descriptor.inlineData).inner
+        }
+      addMetadataColumnToIterator(prepareSchema(requiredSchema),
+        dv,
+        input.asInstanceOf[Iterator[ColumnarBatch]],
+        maxDelVecScatterBatchSize,
+        delVecScatterTimeMetric
+      ).asInstanceOf[Iterator[InternalRow]]
+    }
+  }
+}
+
+object GpuDeltaParquetFileFormat {
+  def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
+    val format = meta.wrapped.relation.fileFormat.asInstanceOf[DeltaParquetFileFormat]
+    val requiredSchema = meta.wrapped.requiredSchema
+    if (requiredSchema.exists(_.name == IS_ROW_DELETED_COLUMN_NAME)) {
+      meta.willNotWorkOnGpu(
+        s"reading metadata column $IS_ROW_DELETED_COLUMN_NAME is not supported")
+    }
+    if (format.hasDeletionVectorMap) {
+      meta.willNotWorkOnGpu("deletion vectors are not supported")
+    }
+  }
+
+  def convertToGpu(fmt: DeltaParquetFileFormat): GpuDeltaParquetFileFormat = {
+    GpuDeltaParquetFileFormat(fmt.columnMappingMode, fmt.referenceSchema, fmt.isSplittable,
+      fmt.disablePushDowns, fmt.broadcastDvMap)
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeleteCommandMetaShim.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeleteCommandMetaShim.scala
new file mode 100644
index 00000000000..6cff03517dc
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeleteCommandMetaShim.scala
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.commands.DeletionVectorUtils
+import com.databricks.sql.transaction.tahoe.sources.DeltaSQLConf
+import com.nvidia.spark.rapids.delta.{DeleteCommandEdgeMeta, DeleteCommandMeta}
+
+object DeleteCommandMetaShim {
+  def tagForGpu(meta: DeleteCommandMeta): Unit = {
+    val dvFeatureEnabled = DeletionVectorUtils.deletionVectorsWritable(
+      meta.deleteCmd.deltaLog.unsafeVolatileSnapshot)
+    if (dvFeatureEnabled && meta.deleteCmd.conf.getConf(
+        DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS)) {
+      // https://github.com/NVIDIA/spark-rapids/issues/8654
+      meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
+    }
+  }
+
+  def tagForGpu(meta: DeleteCommandEdgeMeta): Unit = {
+    val dvFeatureEnabled = DeletionVectorUtils.deletionVectorsWritable(
+      meta.deleteCmd.deltaLog.unsafeVolatileSnapshot)
+    if (dvFeatureEnabled && meta.deleteCmd.conf.getConf(
+        DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS)) {
+      // https://github.com/NVIDIA/spark-rapids/issues/8654
+      meta.willNotWorkOnGpu("Deletion vector writes are not supported on GPU")
+    }
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeltaLogShim.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeltaLogShim.scala
new file mode 100644
index 00000000000..6b35e7c3e45
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/DeltaLogShim.scala
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.DeltaLog
+import com.databricks.sql.transaction.tahoe.actions.Metadata
+
+import org.apache.spark.sql.execution.datasources.FileFormat
+
+object DeltaLogShim {
+  def fileFormat(deltaLog: DeltaLog): FileFormat = {
+    deltaLog.fileFormat(deltaLog.unsafeVolatileSnapshot.protocol,
+      deltaLog.unsafeVolatileSnapshot.metadata)
+  }
+  def getMetadata(deltaLog: DeltaLog): Metadata = {
+    deltaLog.unsafeVolatileSnapshot.metadata
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/InvariantViolationExceptionShim.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/InvariantViolationExceptionShim.scala
new file mode 100644
index 00000000000..31881d2e375
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/InvariantViolationExceptionShim.scala
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.constraints.Constraints._
+import com.databricks.sql.transaction.tahoe.schema.DeltaInvariantViolationException
+import com.databricks.sql.transaction.tahoe.schema.InvariantViolationException
+
+object InvariantViolationExceptionShim {
+  def apply(c: Check, m: Map[String, Any]): InvariantViolationException = {
+    DeltaInvariantViolationException(c, m)
+  }
+
+  def apply(c: NotNull): InvariantViolationException = {
+    DeltaInvariantViolationException(c)
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MergeIntoCommandMetaShim.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MergeIntoCommandMetaShim.scala
new file mode 100644
index 00000000000..ebe801e66e9
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MergeIntoCommandMetaShim.scala
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.commands.{MergeIntoCommand, MergeIntoCommandEdge}
+import com.databricks.sql.transaction.tahoe.rapids.{GpuDeltaLog, GpuLowShuffleMergeCommand, GpuMergeIntoCommand}
+import com.nvidia.spark.rapids.{RapidsConf, RapidsReaderType}
+import com.nvidia.spark.rapids.delta.{MergeIntoCommandEdgeMeta, MergeIntoCommandMeta}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.command.RunnableCommand
+
+object MergeIntoCommandMetaShim extends Logging {
+  def tagForGpu(meta: MergeIntoCommandMeta, mergeCmd: MergeIntoCommand): Unit = {
+    // see https://github.com/NVIDIA/spark-rapids/issues/8415 for more information
+    if (mergeCmd.notMatchedBySourceClauses.nonEmpty) {
+      meta.willNotWorkOnGpu("notMatchedBySourceClauses not supported on GPU")
+    }
+  }
+
+  def tagForGpu(meta: MergeIntoCommandEdgeMeta, mergeCmd: MergeIntoCommandEdge): Unit = {
+    // see https://github.com/NVIDIA/spark-rapids/issues/8415 for more information
+    if (mergeCmd.notMatchedBySourceClauses.nonEmpty) {
+      meta.willNotWorkOnGpu("notMatchedBySourceClauses not supported on GPU")
+    }
+  }
+
+  def convertToGpu(mergeCmd: MergeIntoCommand, conf: RapidsConf): RunnableCommand = {
+    // TODO: Currently we only support low shuffler merge only when parquet per file read is enabled
+    // due to the limitation of implementing row index metadata column.
+    if (conf.isDeltaLowShuffleMergeEnabled) {
+      if (conf.isParquetPerFileReadEnabled) {
+        GpuLowShuffleMergeCommand(
+          mergeCmd.source,
+          mergeCmd.target,
+          new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+          mergeCmd.condition,
+          mergeCmd.matchedClauses,
+          mergeCmd.notMatchedClauses,
+          mergeCmd.notMatchedBySourceClauses,
+          mergeCmd.migratedSchema)(conf)
+      } else {
+        logWarning(s"""Low shuffle merge disabled since ${RapidsConf.PARQUET_READER_TYPE} is
+          not set to ${RapidsReaderType.PERFILE}. Falling back to classic merge.""")
+        GpuMergeIntoCommand(
+          mergeCmd.source,
+          mergeCmd.target,
+          new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+          mergeCmd.condition,
+          mergeCmd.matchedClauses,
+          mergeCmd.notMatchedClauses,
+          mergeCmd.notMatchedBySourceClauses,
+          mergeCmd.migratedSchema)(conf)
+      }
+    } else {
+      GpuMergeIntoCommand(
+        mergeCmd.source,
+        mergeCmd.target,
+        new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+        mergeCmd.condition,
+        mergeCmd.matchedClauses,
+        mergeCmd.notMatchedClauses,
+        mergeCmd.notMatchedBySourceClauses,
+        mergeCmd.migratedSchema)(conf)
+    }
+  }
+
+  def convertToGpu(mergeCmd: MergeIntoCommandEdge, conf: RapidsConf): RunnableCommand = {
+    // TODO: Currently we only support low shuffler merge only when parquet per file read is enabled
+    // due to the limitation of implementing row index metadata column.
+    if (conf.isDeltaLowShuffleMergeEnabled) {
+      if (conf.isParquetPerFileReadEnabled) {
+        GpuLowShuffleMergeCommand(
+          mergeCmd.source,
+          mergeCmd.target,
+          new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+          mergeCmd.condition,
+          mergeCmd.matchedClauses,
+          mergeCmd.notMatchedClauses,
+          mergeCmd.notMatchedBySourceClauses,
+          mergeCmd.migratedSchema)(conf)
+      } else {
+        logWarning(s"""Low shuffle merge is still disable since ${RapidsConf.PARQUET_READER_TYPE} is
+          not set to ${RapidsReaderType.PERFILE}. Falling back to classic merge.""")
+        GpuMergeIntoCommand(
+          mergeCmd.source,
+          mergeCmd.target,
+          new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+          mergeCmd.condition,
+          mergeCmd.matchedClauses,
+          mergeCmd.notMatchedClauses,
+          mergeCmd.notMatchedBySourceClauses,
+          mergeCmd.migratedSchema)(conf)
+      }
+    } else {
+      GpuMergeIntoCommand(
+        mergeCmd.source,
+        mergeCmd.target,
+        new GpuDeltaLog(mergeCmd.targetFileIndex.deltaLog, conf),
+        mergeCmd.condition,
+        mergeCmd.matchedClauses,
+        mergeCmd.notMatchedClauses,
+        mergeCmd.notMatchedBySourceClauses,
+        mergeCmd.migratedSchema)(conf)
+    }
+  }
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MetadataShims.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MetadataShims.scala
new file mode 100644
index 00000000000..e717df94d89
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/MetadataShims.scala
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.stats.DeltaStatistics
+
+trait ShimUsesMetadataFields {
+  val NUM_RECORDS = DeltaStatistics.NUM_RECORDS
+  val MIN = DeltaStatistics.MIN
+  val MAX = DeltaStatistics.MAX
+  val NULL_COUNT = DeltaStatistics.NULL_COUNT
+}
diff --git a/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/ShimDeltaUDF.scala b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/ShimDeltaUDF.scala
new file mode 100644
index 00000000000..52a76ab907a
--- /dev/null
+++ b/delta-lake/delta-spark350db143/src/main/scala/com/nvidia/spark/rapids/delta/shims/ShimDeltaUDF.scala
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta.shims
+
+import com.databricks.sql.transaction.tahoe.DeltaUDF
+
+import org.apache.spark.sql.expressions.UserDefinedFunction
+
+object ShimDeltaUDF {
+  def stringStringUdf(f: String => String): UserDefinedFunction = DeltaUDF.stringFromString(f)
+}
diff --git a/delta-lake/delta-stub/pom.xml b/delta-lake/delta-stub/pom.xml
index 5e855365fa9..deb2bb7233d 100644
--- a/delta-lake/delta-stub/pom.xml
+++ b/delta-lake/delta-stub/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-stub_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake Stub</name>
     <description>Delta Lake stub for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-stub</rapids.module>
diff --git a/dist/pom.xml b/dist/pom.xml
index fb2024262ca..f7d69b761f2 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Distribution</name>
     <description>Creates the distribution package of the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <dependencies>
         <dependency>
             <groupId>com.nvidia</groupId>
diff --git a/docs/additional-functionality/advanced_configs.md b/docs/additional-functionality/advanced_configs.md
index 8f030b1aa29..07346a5b850 100644
--- a/docs/additional-functionality/advanced_configs.md
+++ b/docs/additional-functionality/advanced_configs.md
@@ -95,8 +95,8 @@ Name | Description | Default Value | Applicable at
 <a name="sql.format.hive.text.write.enabled"></a>spark.rapids.sql.format.hive.text.write.enabled|When set to false disables Hive text table write acceleration|false|Runtime
 <a name="sql.format.iceberg.enabled"></a>spark.rapids.sql.format.iceberg.enabled|When set to false disables all Iceberg acceleration|true|Runtime
 <a name="sql.format.iceberg.read.enabled"></a>spark.rapids.sql.format.iceberg.read.enabled|When set to false disables Iceberg input acceleration|true|Runtime
-<a name="sql.format.json.enabled"></a>spark.rapids.sql.format.json.enabled|When set to true enables all json input and output acceleration. (only input is currently supported anyways)|false|Runtime
-<a name="sql.format.json.read.enabled"></a>spark.rapids.sql.format.json.read.enabled|When set to true enables json input acceleration|false|Runtime
+<a name="sql.format.json.enabled"></a>spark.rapids.sql.format.json.enabled|When set to true enables all json input and output acceleration. (only input is currently supported anyways)|true|Runtime
+<a name="sql.format.json.read.enabled"></a>spark.rapids.sql.format.json.read.enabled|When set to true enables json input acceleration|true|Runtime
 <a name="sql.format.orc.enabled"></a>spark.rapids.sql.format.orc.enabled|When set to false disables all orc input and output acceleration|true|Runtime
 <a name="sql.format.orc.floatTypesToString.enable"></a>spark.rapids.sql.format.orc.floatTypesToString.enable|When reading an ORC file, the source data schemas(schemas of ORC file) may differ from the target schemas (schemas of the reader), we need to handle the castings from source type to target type. Since float/double numbers in GPU have different precision with CPU, when casting float/double to string, the result of GPU is different from result of CPU spark. Its default value is `true` (this means the strings result will differ from result of CPU). If it's set `false` explicitly and there exists casting from float/double to string in the job, then such behavior will cause an exception, and the job will fail.|true|Runtime
 <a name="sql.format.orc.multiThreadedRead.maxNumFilesParallel"></a>spark.rapids.sql.format.orc.multiThreadedRead.maxNumFilesParallel|A limit on the maximum number of files per task processed in parallel on the CPU side before the file is sent to the GPU. This affects the amount of host memory used when reading the files in parallel. Used with MULTITHREADED reader, see spark.rapids.sql.format.orc.reader.type.|2147483647|Runtime
@@ -129,6 +129,7 @@ Name | Description | Default Value | Applicable at
 <a name="sql.join.leftOuter.enabled"></a>spark.rapids.sql.join.leftOuter.enabled|When set to true left outer joins are enabled on the GPU|true|Runtime
 <a name="sql.join.leftSemi.enabled"></a>spark.rapids.sql.join.leftSemi.enabled|When set to true left semi joins are enabled on the GPU|true|Runtime
 <a name="sql.join.rightOuter.enabled"></a>spark.rapids.sql.join.rightOuter.enabled|When set to true right outer joins are enabled on the GPU|true|Runtime
+<a name="sql.json.read.datetime.enabled"></a>spark.rapids.sql.json.read.datetime.enabled|JSON reading is not 100% compatible when reading dates and timestamps.|false|Runtime
 <a name="sql.json.read.decimal.enabled"></a>spark.rapids.sql.json.read.decimal.enabled|When reading a quoted string as a decimal Spark supports reading non-ascii unicode digits, and the RAPIDS Accelerator does not.|true|Runtime
 <a name="sql.json.read.double.enabled"></a>spark.rapids.sql.json.read.double.enabled|JSON reading is not 100% compatible when reading doubles.|true|Runtime
 <a name="sql.json.read.float.enabled"></a>spark.rapids.sql.json.read.float.enabled|JSON reading is not 100% compatible when reading floats.|true|Runtime
@@ -277,7 +278,7 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.IsNaN"></a>spark.rapids.sql.expression.IsNaN|`isnan`|Checks if a value is NaN|true|None|
 <a name="sql.expression.IsNotNull"></a>spark.rapids.sql.expression.IsNotNull|`isnotnull`|Checks if a value is not null|true|None|
 <a name="sql.expression.IsNull"></a>spark.rapids.sql.expression.IsNull|`isnull`|Checks if a value is null|true|None|
-<a name="sql.expression.JsonToStructs"></a>spark.rapids.sql.expression.JsonToStructs|`from_json`|Returns a struct value with the given `jsonStr` and `schema`|false|This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case|
+<a name="sql.expression.JsonToStructs"></a>spark.rapids.sql.expression.JsonToStructs|`from_json`|Returns a struct value with the given `jsonStr` and `schema`|true|None|
 <a name="sql.expression.JsonTuple"></a>spark.rapids.sql.expression.JsonTuple|`json_tuple`|Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.|false|This is disabled by default because Experimental feature that could be unstable or have performance issues.|
 <a name="sql.expression.KnownFloatingPointNormalized"></a>spark.rapids.sql.expression.KnownFloatingPointNormalized| |Tag to prevent redundant normalization|true|None|
 <a name="sql.expression.KnownNotNull"></a>spark.rapids.sql.expression.KnownNotNull| |Tag an expression as known to not be null|true|None|
@@ -310,6 +311,7 @@ Name | SQL Function(s) | Description | Default Value | Notes
 <a name="sql.expression.Minute"></a>spark.rapids.sql.expression.Minute|`minute`|Returns the minute component of the string/timestamp|true|None|
 <a name="sql.expression.MonotonicallyIncreasingID"></a>spark.rapids.sql.expression.MonotonicallyIncreasingID|`monotonically_increasing_id`|Returns monotonically increasing 64-bit integers|true|None|
 <a name="sql.expression.Month"></a>spark.rapids.sql.expression.Month|`month`|Returns the month from a date or timestamp|true|None|
+<a name="sql.expression.MonthsBetween"></a>spark.rapids.sql.expression.MonthsBetween|`months_between`|If `timestamp1` is later than `timestamp2`, then the result is positive. If `timestamp1` and `timestamp2` are on the same day of month, or both are the last day of month, time of day will be ignored. Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits unless roundOff=false.|true|None|
 <a name="sql.expression.Multiply"></a>spark.rapids.sql.expression.Multiply|`*`|Multiplication|true|None|
 <a name="sql.expression.Murmur3Hash"></a>spark.rapids.sql.expression.Murmur3Hash|`hash`|Murmur3 hash operator|true|None|
 <a name="sql.expression.NaNvl"></a>spark.rapids.sql.expression.NaNvl|`nanvl`|Evaluates to `left` iff left is not NaN, `right` otherwise|true|None|
diff --git a/docs/archives/CHANGELOG_24.02-to-24.06.md b/docs/archives/CHANGELOG_24.02-to-24.08.md
similarity index 74%
rename from docs/archives/CHANGELOG_24.02-to-24.06.md
rename to docs/archives/CHANGELOG_24.02-to-24.08.md
index d95307a1efe..0366869935d 100644
--- a/docs/archives/CHANGELOG_24.02-to-24.06.md
+++ b/docs/archives/CHANGELOG_24.02-to-24.08.md
@@ -1,5 +1,212 @@
 # Change log
-Generated on 2024-10-09
+Generated on 2024-12-08
+## Release 24.08
+
+### Features
+|||
+|:---|:---|
+|[#9259](https://github.com/NVIDIA/spark-rapids/issues/9259)|[FEA] Create Spark 4.0.0 shim and build env|
+|[#10366](https://github.com/NVIDIA/spark-rapids/issues/10366)|[FEA] It would be nice if we could support Hive-style write bucketing table|
+|[#10987](https://github.com/NVIDIA/spark-rapids/issues/10987)|[FEA] Implement lore framework to support all operators.|
+|[#11087](https://github.com/NVIDIA/spark-rapids/issues/11087)|[FEA] Support regex pattern with brackets when rewrite to PrefixRange patten in rlike|
+|[#22](https://github.com/NVIDIA/spark-rapids/issues/22)|[FEA] Add support for bucketed writes|
+|[#9939](https://github.com/NVIDIA/spark-rapids/issues/9939)|[FEA] `GpuInsertIntoHiveTable` supports parquet format|
+
+### Performance
+|||
+|:---|:---|
+|[#8750](https://github.com/NVIDIA/spark-rapids/issues/8750)|[FEA] Rework GpuSubstringIndex to use cudf::slice_strings|
+|[#7404](https://github.com/NVIDIA/spark-rapids/issues/7404)|[FEA] explore a hash agg passthrough on partial aggregates|
+|[#10976](https://github.com/NVIDIA/spark-rapids/issues/10976)|Rewrite `pattern1|pattern2|pattern3` to multiple contains in `rlike`|
+
+### Bugs Fixed
+|||
+|:---|:---|
+|[#11287](https://github.com/NVIDIA/spark-rapids/issues/11287)|[BUG] String split APIs on empty string produce incorrect result|
+|[#11270](https://github.com/NVIDIA/spark-rapids/issues/11270)|[BUG] test_regexp_replace[DATAGEN_SEED=1722297411, TZ=UTC] hanging there forever in pre-merge CI intermittently|
+|[#9682](https://github.com/NVIDIA/spark-rapids/issues/9682)|[BUG] Casting FLOAT64 to DECIMAL(12,7) produces different rows from Apache Spark CPU|
+|[#10809](https://github.com/NVIDIA/spark-rapids/issues/10809)|[BUG] cast(9.95 as decimal(3,1)), actual: 9.9, expected: 10.0|
+|[#11266](https://github.com/NVIDIA/spark-rapids/issues/11266)|[BUG] test_broadcast_hash_join_constant_keys failed in databricks runtimes|
+|[#11243](https://github.com/NVIDIA/spark-rapids/issues/11243)|[BUG] ArrayIndexOutOfBoundsException on a left outer join|
+|[#11030](https://github.com/NVIDIA/spark-rapids/issues/11030)|Fix tests failures in string_test.py|
+|[#11245](https://github.com/NVIDIA/spark-rapids/issues/11245)|[BUG] mvn verify for the source-javadoc fails and no pre-merge check catches it|
+|[#11223](https://github.com/NVIDIA/spark-rapids/issues/11223)|[BUG] Remove unreferenced `CUDF_VER=xxx` in the CI script|
+|[#11114](https://github.com/NVIDIA/spark-rapids/issues/11114)|[BUG] Update nightly tests for Scala 2.13 to use JDK 17 only|
+|[#11229](https://github.com/NVIDIA/spark-rapids/issues/11229)|[BUG] test_delta_name_column_mapping_no_field_ids fails on Spark |
+|[#11031](https://github.com/NVIDIA/spark-rapids/issues/11031)|Fix tests failures in multiple files |
+|[#10948](https://github.com/NVIDIA/spark-rapids/issues/10948)|Figure out why `MapFromArrays ` appears in the tests for hive parquet write|
+|[#11018](https://github.com/NVIDIA/spark-rapids/issues/11018)|Fix tests failures in hash_aggregate_test.py|
+|[#11173](https://github.com/NVIDIA/spark-rapids/issues/11173)|[BUG] The `rs. serialization time` metric is misleading|
+|[#11017](https://github.com/NVIDIA/spark-rapids/issues/11017)|Fix tests failures in url_test.py|
+|[#11201](https://github.com/NVIDIA/spark-rapids/issues/11201)|[BUG] Delta Lake tables with name mapping can throw exceptions on read|
+|[#11175](https://github.com/NVIDIA/spark-rapids/issues/11175)|[BUG] Clean up unused and duplicated 'org/roaringbitmap' folder in the spark3xx shims|
+|[#11196](https://github.com/NVIDIA/spark-rapids/issues/11196)|[BUG] pipeline failed due to class not found exception: NoClassDefFoundError: com/nvidia/spark/rapids/GpuScalar|
+|[#11189](https://github.com/NVIDIA/spark-rapids/issues/11189)|[BUG] regression in NDS after PR #11170|
+|[#11167](https://github.com/NVIDIA/spark-rapids/issues/11167)|[BUG] UnsupportedOperationException during delta write with `optimize()`|
+|[#11172](https://github.com/NVIDIA/spark-rapids/issues/11172)|[BUG] `get_json_object` returns wrong output with wildcard path|
+|[#11148](https://github.com/NVIDIA/spark-rapids/issues/11148)|[BUG] Integration test `test_write_hive_bucketed_table` fails|
+|[#11155](https://github.com/NVIDIA/spark-rapids/issues/11155)|[BUG] ArrayIndexOutOfBoundsException in BatchWithPartitionData.splitColumnarBatch|
+|[#11152](https://github.com/NVIDIA/spark-rapids/issues/11152)|[BUG] LORE dumping consumes too much memory.|
+|[#11029](https://github.com/NVIDIA/spark-rapids/issues/11029)|Fix tests failures in subquery_test.py|
+|[#11150](https://github.com/NVIDIA/spark-rapids/issues/11150)|[BUG] hive_parquet_write_test.py::test_insert_hive_bucketed_table failure|
+|[#11070](https://github.com/NVIDIA/spark-rapids/issues/11070)|[BUG] numpy2 fail fastparquet cases: numpy.dtype size changed|
+|[#11136](https://github.com/NVIDIA/spark-rapids/issues/11136)|UnaryPositive expression doesn't extend UnaryExpression|
+|[#11122](https://github.com/NVIDIA/spark-rapids/issues/11122)|[BUG] UT MetricRange failed 651070526 was not less than 1.5E8 in spark313|
+|[#11119](https://github.com/NVIDIA/spark-rapids/issues/11119)|[BUG] window_function_test.py::test_window_group_limits_fallback_for_row_number fails in a distributed environment|
+|[#11023](https://github.com/NVIDIA/spark-rapids/issues/11023)|Fix tests failures in dpp_test.py|
+|[#11026](https://github.com/NVIDIA/spark-rapids/issues/11026)|Fix tests failures in map_test.py|
+|[#11020](https://github.com/NVIDIA/spark-rapids/issues/11020)|Fix tests failures in grouping_sets_test.py|
+|[#11113](https://github.com/NVIDIA/spark-rapids/issues/11113)|[BUG] Update premerge tests for Scala 2.13 to use JDK 17 only|
+|[#11027](https://github.com/NVIDIA/spark-rapids/issues/11027)|Fix tests failures in sort_test.py|
+|[#10775](https://github.com/NVIDIA/spark-rapids/issues/10775)|[BUG] Issues found by Spark UT Framework on RapidsStringExpressionsSuite|
+|[#11033](https://github.com/NVIDIA/spark-rapids/issues/11033)|[BUG] CICD failed a case: cmp_test.py::test_empty_filter[>]|
+|[#11103](https://github.com/NVIDIA/spark-rapids/issues/11103)|[BUG] UCX Shuffle With scala.MatchError |
+|[#11007](https://github.com/NVIDIA/spark-rapids/issues/11007)|Fix tests failures in array_test.py|
+|[#10801](https://github.com/NVIDIA/spark-rapids/issues/10801)|[BUG] JDK17 nightly build after Spark UT Framework is merged|
+|[#11019](https://github.com/NVIDIA/spark-rapids/issues/11019)|Fix tests failures in window_function_test.py|
+|[#11063](https://github.com/NVIDIA/spark-rapids/issues/11063)|[BUG] op time for GpuCoalesceBatches is more than actual|
+|[#11006](https://github.com/NVIDIA/spark-rapids/issues/11006)|Fix test failures in arithmetic_ops_test.py|
+|[#10995](https://github.com/NVIDIA/spark-rapids/issues/10995)|Fallback TimeZoneAwareExpression that only support UTC with zoneId instead of timeZone config|
+|[#8652](https://github.com/NVIDIA/spark-rapids/issues/8652)|[BUG] array_item test failures on Spark 3.3.x|
+|[#11053](https://github.com/NVIDIA/spark-rapids/issues/11053)|[BUG] Build on Databricks 330 fails|
+|[#10925](https://github.com/NVIDIA/spark-rapids/issues/10925)| Concat cannot accept no parameter|
+|[#10975](https://github.com/NVIDIA/spark-rapids/issues/10975)|[BUG] regex `^.*literal` cannot be rewritten as `contains(literal)` for multiline strings|
+|[#10956](https://github.com/NVIDIA/spark-rapids/issues/10956)|[BUG] hive_parquet_write_test.py: test_write_compressed_parquet_into_hive_table integration test failures|
+|[#10772](https://github.com/NVIDIA/spark-rapids/issues/10772)|[BUG] Issues found by Spark UT Framework on RapidsDataFrameAggregateSuite|
+|[#10986](https://github.com/NVIDIA/spark-rapids/issues/10986)|[BUG]Cast from string to float using hand-picked values failed in CastOpSuite|
+|[#10972](https://github.com/NVIDIA/spark-rapids/issues/10972)|Spark 4.0 compile errors |
+|[#10794](https://github.com/NVIDIA/spark-rapids/issues/10794)|[BUG] Incorrect cast of string columns containing various infinity notations with trailing spaces |
+|[#10964](https://github.com/NVIDIA/spark-rapids/issues/10964)|[BUG] Improve stability of pre-merge jenkinsfile|
+|[#10714](https://github.com/NVIDIA/spark-rapids/issues/10714)|Signature changed for `PythonUDFRunner.writeUDFs` |
+|[#10712](https://github.com/NVIDIA/spark-rapids/issues/10712)|[AUDIT] BatchScanExec/DataSourceV2Relation to group splits by join keys if they differ from partition keys|
+|[#10673](https://github.com/NVIDIA/spark-rapids/issues/10673)|[AUDIT] Rename plan nodes for PythonMapInArrowExec|
+|[#10710](https://github.com/NVIDIA/spark-rapids/issues/10710)|[AUDIT] `uncacheTableOrView` changed in CommandUtils |
+|[#10711](https://github.com/NVIDIA/spark-rapids/issues/10711)|[AUDIT] Match DataSourceV2ScanExecBase changes to groupPartitions method |
+|[#10669](https://github.com/NVIDIA/spark-rapids/issues/10669)|Supporting broadcast of multiple filtering keys in DynamicPruning |
+
+### PRs
+|||
+|:---|:---|
+|[#11400](https://github.com/NVIDIA/spark-rapids/pull/11400)|[DOC] update notes in download page for the decompressing gzip issue [skip ci]|
+|[#11355](https://github.com/NVIDIA/spark-rapids/pull/11355)|Update changelog for the v24.08 release [skip ci]|
+|[#11353](https://github.com/NVIDIA/spark-rapids/pull/11353)|Update download doc for v24.08.1 [skip ci]|
+|[#11352](https://github.com/NVIDIA/spark-rapids/pull/11352)|Update version to 24.08.1-SNAPSHOT [skip ci]|
+|[#11337](https://github.com/NVIDIA/spark-rapids/pull/11337)|Update changelog for the v24.08 release [skip ci]|
+|[#11335](https://github.com/NVIDIA/spark-rapids/pull/11335)|Fix Delta Lake truncation of min/max string values|
+|[#11304](https://github.com/NVIDIA/spark-rapids/pull/11304)|Update changelog for v24.08.0 release [skip ci]|
+|[#11303](https://github.com/NVIDIA/spark-rapids/pull/11303)|Update rapids JNI and private dependency to 24.08.0|
+|[#11296](https://github.com/NVIDIA/spark-rapids/pull/11296)|[DOC] update doc for 2408 release [skip CI]|
+|[#11309](https://github.com/NVIDIA/spark-rapids/pull/11309)|[Doc ]Update lore doc about the range [skip ci]|
+|[#11292](https://github.com/NVIDIA/spark-rapids/pull/11292)|Add work around for string split with empty input.|
+|[#11278](https://github.com/NVIDIA/spark-rapids/pull/11278)|Fix formatting of advanced configs doc|
+|[#10917](https://github.com/NVIDIA/spark-rapids/pull/10917)|Adopt changes from JNI for casting from float to decimal|
+|[#11269](https://github.com/NVIDIA/spark-rapids/pull/11269)|Revert "upgrade ucx to 1.17.0"|
+|[#11260](https://github.com/NVIDIA/spark-rapids/pull/11260)|Mitigate intermittent test_buckets and shuffle_smoke_test OOM issue|
+|[#11268](https://github.com/NVIDIA/spark-rapids/pull/11268)|Fix degenerate conditional nested loop join detection|
+|[#11244](https://github.com/NVIDIA/spark-rapids/pull/11244)|Fix ArrayIndexOutOfBoundsException on join counts with constant join keys|
+|[#11259](https://github.com/NVIDIA/spark-rapids/pull/11259)|CI Docker to support integration tests with Rocky OS + jdk17 [skip ci]|
+|[#11247](https://github.com/NVIDIA/spark-rapids/pull/11247)|Fix `string_test.py` errors on Spark 4.0|
+|[#11246](https://github.com/NVIDIA/spark-rapids/pull/11246)|Rework Maven Source Plugin Skip|
+|[#11149](https://github.com/NVIDIA/spark-rapids/pull/11149)|Rework on substring index|
+|[#11236](https://github.com/NVIDIA/spark-rapids/pull/11236)|Remove the unused vars from the version-def CI script|
+|[#11237](https://github.com/NVIDIA/spark-rapids/pull/11237)|Fork jvm for maven-source-plugin|
+|[#11200](https://github.com/NVIDIA/spark-rapids/pull/11200)|Multi-get_json_object|
+|[#11230](https://github.com/NVIDIA/spark-rapids/pull/11230)|Skip test where Delta Lake may not be fully compatible with Spark|
+|[#11220](https://github.com/NVIDIA/spark-rapids/pull/11220)|Avoid failing spark bug SPARK-44242 while generate run_dir|
+|[#11226](https://github.com/NVIDIA/spark-rapids/pull/11226)|Fix auto merge conflict 11212|
+|[#11129](https://github.com/NVIDIA/spark-rapids/pull/11129)|Spark 4: Fix miscellaneous tests including logic, repart, hive_delimited.|
+|[#11163](https://github.com/NVIDIA/spark-rapids/pull/11163)|Support `MapFromArrays` on GPU|
+|[#11219](https://github.com/NVIDIA/spark-rapids/pull/11219)|Fix hash_aggregate_test.py to run with ANSI enabled|
+|[#11186](https://github.com/NVIDIA/spark-rapids/pull/11186)|from_json Json to Struct Exception Logging|
+|[#11180](https://github.com/NVIDIA/spark-rapids/pull/11180)|More accurate estimation for the result serialization time in RapidsShuffleThreadedWriterBase|
+|[#11194](https://github.com/NVIDIA/spark-rapids/pull/11194)|Fix ANSI mode test failures in url_test.py|
+|[#11202](https://github.com/NVIDIA/spark-rapids/pull/11202)|Fix read from Delta Lake table with name column mapping and missing Parquet IDs|
+|[#11185](https://github.com/NVIDIA/spark-rapids/pull/11185)|Fix multi-release jar problem|
+|[#11144](https://github.com/NVIDIA/spark-rapids/pull/11144)|Build the Scala2.13 dist jar with JDK17|
+|[#11197](https://github.com/NVIDIA/spark-rapids/pull/11197)|Fix class not found error: com/nvidia/spark/rapids/GpuScalar|
+|[#11191](https://github.com/NVIDIA/spark-rapids/pull/11191)|Fix dynamic pruning regression in GpuFileSourceScanExec|
+|[#10994](https://github.com/NVIDIA/spark-rapids/pull/10994)|Add Spark 4.0.0 Build Profile and Other Supporting Changes|
+|[#11192](https://github.com/NVIDIA/spark-rapids/pull/11192)|Append new authorized user to blossom-ci whitelist [skip ci]|
+|[#11179](https://github.com/NVIDIA/spark-rapids/pull/11179)|Allow more expressions to be tiered|
+|[#11141](https://github.com/NVIDIA/spark-rapids/pull/11141)|Enable some Rapids config in RapidsSQLTestsBaseTrait for Spark UT|
+|[#11170](https://github.com/NVIDIA/spark-rapids/pull/11170)|Avoid listFiles or inputFiles on relations with static partitioning|
+|[#11159](https://github.com/NVIDIA/spark-rapids/pull/11159)|Drop spark31x shims|
+|[#10951](https://github.com/NVIDIA/spark-rapids/pull/10951)|Case when performance improvement: reduce the `copy_if_else`|
+|[#11165](https://github.com/NVIDIA/spark-rapids/pull/11165)|Fix some GpuBroadcastToRowExec by not dropping columns|
+|[#11126](https://github.com/NVIDIA/spark-rapids/pull/11126)|Coalesce batches after a logical coalesce operation|
+|[#11164](https://github.com/NVIDIA/spark-rapids/pull/11164)|fix the bucketed write error for non-utc cases|
+|[#11132](https://github.com/NVIDIA/spark-rapids/pull/11132)|Add deletion vector metrics for low shuffle merge.|
+|[#11156](https://github.com/NVIDIA/spark-rapids/pull/11156)|Fix batch splitting for partition column size on row-count-only batches|
+|[#11153](https://github.com/NVIDIA/spark-rapids/pull/11153)|Fix LORE dump oom.|
+|[#11102](https://github.com/NVIDIA/spark-rapids/pull/11102)|Fix ANSI mode failures in subquery_test.py|
+|[#11151](https://github.com/NVIDIA/spark-rapids/pull/11151)|Fix the test error of the bucketed write for the non-utc case|
+|[#11147](https://github.com/NVIDIA/spark-rapids/pull/11147)|upgrade ucx to 1.17.0|
+|[#11138](https://github.com/NVIDIA/spark-rapids/pull/11138)|Update fastparquet to 2024.5.0 for numpy2 compatibility|
+|[#11137](https://github.com/NVIDIA/spark-rapids/pull/11137)|Handle the change for UnaryPositive now extending RuntimeReplaceable|
+|[#11094](https://github.com/NVIDIA/spark-rapids/pull/11094)|Add `HiveHash` support on GPU|
+|[#11139](https://github.com/NVIDIA/spark-rapids/pull/11139)|Improve MetricsSuite to allow more gc jitter|
+|[#11133](https://github.com/NVIDIA/spark-rapids/pull/11133)|Fix `test_window_group_limits_fallback`|
+|[#11097](https://github.com/NVIDIA/spark-rapids/pull/11097)|Fix miscellaneous integ tests for Spark 4|
+|[#11118](https://github.com/NVIDIA/spark-rapids/pull/11118)|Fix issue with DPP and AQE on reused broadcast exchanges|
+|[#11043](https://github.com/NVIDIA/spark-rapids/pull/11043)|Dataproc serverless test fixes|
+|[#10965](https://github.com/NVIDIA/spark-rapids/pull/10965)|Profiler: Disable collecting async allocation events by default|
+|[#11117](https://github.com/NVIDIA/spark-rapids/pull/11117)|Update Scala2.13 premerge CI against JDK17|
+|[#11084](https://github.com/NVIDIA/spark-rapids/pull/11084)|Introduce LORE framework.|
+|[#11099](https://github.com/NVIDIA/spark-rapids/pull/11099)|Spark 4: Handle ANSI mode in sort_test.py|
+|[#11115](https://github.com/NVIDIA/spark-rapids/pull/11115)|Fix match error in RapidsShuffleIterator.scala [scala2.13]|
+|[#11088](https://github.com/NVIDIA/spark-rapids/pull/11088)|Support regex patterns with brackets when rewriting to PrefixRange pattern in rlike.|
+|[#10950](https://github.com/NVIDIA/spark-rapids/pull/10950)|Add a heuristic to skip second or third agg pass|
+|[#11048](https://github.com/NVIDIA/spark-rapids/pull/11048)|Fixed array_tests for Spark 4.0.0|
+|[#11049](https://github.com/NVIDIA/spark-rapids/pull/11049)|Fix some cast_tests for Spark 4.0.0|
+|[#11066](https://github.com/NVIDIA/spark-rapids/pull/11066)|Replaced spark3xx-common references to spark-shared|
+|[#11083](https://github.com/NVIDIA/spark-rapids/pull/11083)|Exclude a case based on JDK version in Spark UT|
+|[#10997](https://github.com/NVIDIA/spark-rapids/pull/10997)|Fix some test issues in Spark UT and keep RapidsTestSettings update-to-date|
+|[#11073](https://github.com/NVIDIA/spark-rapids/pull/11073)|Disable ANSI mode for window function tests|
+|[#11076](https://github.com/NVIDIA/spark-rapids/pull/11076)|Improve the diagnostics for 'conv' fallback explain|
+|[#11092](https://github.com/NVIDIA/spark-rapids/pull/11092)|Add GpuBucketingUtils shim to Spark 4.0.0|
+|[#11062](https://github.com/NVIDIA/spark-rapids/pull/11062)|fix duplicate counted metrics like op time for GpuCoalesceBatches|
+|[#11044](https://github.com/NVIDIA/spark-rapids/pull/11044)|Fixed Failing tests in arithmetic_ops_tests for Spark 4.0.0|
+|[#11086](https://github.com/NVIDIA/spark-rapids/pull/11086)|upgrade blossom-ci actions version [skip ci]|
+|[#10957](https://github.com/NVIDIA/spark-rapids/pull/10957)|Support bucketing write for GPU|
+|[#10979](https://github.com/NVIDIA/spark-rapids/pull/10979)|[FEA] Introduce low shuffle merge.|
+|[#10996](https://github.com/NVIDIA/spark-rapids/pull/10996)|Fallback non-UTC TimeZoneAwareExpression with zoneId|
+|[#11072](https://github.com/NVIDIA/spark-rapids/pull/11072)|Workaround numpy2 failed fastparquet compatibility tests|
+|[#11046](https://github.com/NVIDIA/spark-rapids/pull/11046)|Calculate parallelism to speed up pre-merge CI|
+|[#11054](https://github.com/NVIDIA/spark-rapids/pull/11054)|fix flaky array_item test failures|
+|[#11051](https://github.com/NVIDIA/spark-rapids/pull/11051)|[FEA] Increase parallelism of deltalake test on databricks|
+|[#10993](https://github.com/NVIDIA/spark-rapids/pull/10993)|`binary-dedupe` changes for Spark 4.0.0|
+|[#11060](https://github.com/NVIDIA/spark-rapids/pull/11060)|Add in the ability to fingerprint JSON columns|
+|[#11059](https://github.com/NVIDIA/spark-rapids/pull/11059)|Revert "Add in the ability to fingerprint JSON columns (#11002)" [skip ci]|
+|[#11039](https://github.com/NVIDIA/spark-rapids/pull/11039)|Concat() Exception bug fix|
+|[#11002](https://github.com/NVIDIA/spark-rapids/pull/11002)|Add in the ability to fingerprint JSON columns|
+|[#10977](https://github.com/NVIDIA/spark-rapids/pull/10977)|Rewrite multiple literal choice regex to multiple contains in rlike|
+|[#11035](https://github.com/NVIDIA/spark-rapids/pull/11035)|Fix auto merge conflict 11034 [skip ci]|
+|[#11040](https://github.com/NVIDIA/spark-rapids/pull/11040)|Append new authorized user to blossom-ci whitelist [skip ci]|
+|[#11036](https://github.com/NVIDIA/spark-rapids/pull/11036)|Update blossom-ci ACL to secure format [skip ci]|
+|[#11032](https://github.com/NVIDIA/spark-rapids/pull/11032)|Fix a hive write test failure for Spark 350|
+|[#10998](https://github.com/NVIDIA/spark-rapids/pull/10998)|Improve log to print more lines in build [skip ci]|
+|[#10992](https://github.com/NVIDIA/spark-rapids/pull/10992)|Addressing the Named Parameter change in Spark 4.0.0|
+|[#10943](https://github.com/NVIDIA/spark-rapids/pull/10943)|Fix Spark UT issues in RapidsDataFrameAggregateSuite|
+|[#10963](https://github.com/NVIDIA/spark-rapids/pull/10963)|Add rapids configs to enable GPU running in Spark UT|
+|[#10978](https://github.com/NVIDIA/spark-rapids/pull/10978)|More compilation fixes for Spark 4.0.0|
+|[#10953](https://github.com/NVIDIA/spark-rapids/pull/10953)|Speed up the integration tests by running them in parallel on the Databricks cluster|
+|[#10958](https://github.com/NVIDIA/spark-rapids/pull/10958)|Fix a hive write test failure|
+|[#10970](https://github.com/NVIDIA/spark-rapids/pull/10970)|Move Support for `RaiseError` to a Shim Excluding Spark 4.0.0|
+|[#10966](https://github.com/NVIDIA/spark-rapids/pull/10966)|Add default value for REF of premerge jenkinsfile to avoid bad overwritten [skip ci]|
+|[#10959](https://github.com/NVIDIA/spark-rapids/pull/10959)|Add new ID to blossom-ci allow list [skip ci]|
+|[#10952](https://github.com/NVIDIA/spark-rapids/pull/10952)|Add shims to take care of the signature change for writeUDFs in PythonUDFRunner|
+|[#10931](https://github.com/NVIDIA/spark-rapids/pull/10931)|Add Support for Renaming of PythonMapInArrow|
+|[#10949](https://github.com/NVIDIA/spark-rapids/pull/10949)|Change dependency version to 24.08.0-SNAPSHOT|
+|[#10857](https://github.com/NVIDIA/spark-rapids/pull/10857)|[Spark 4.0] Account for `PartitionedFileUtil.splitFiles` signature change.|
+|[#10912](https://github.com/NVIDIA/spark-rapids/pull/10912)|GpuInsertIntoHiveTable supports parquet format|
+|[#10863](https://github.com/NVIDIA/spark-rapids/pull/10863)|[Spark 4.0] Account for `CommandUtils.uncacheTableOrView` signature change.|
+|[#10944](https://github.com/NVIDIA/spark-rapids/pull/10944)|Added Shim for BatchScanExec to Support Spark 4.0|
+|[#10946](https://github.com/NVIDIA/spark-rapids/pull/10946)|Unarchive Spark test jar for spark.read(ability)|
+|[#10945](https://github.com/NVIDIA/spark-rapids/pull/10945)|Add Support for Multiple Filtering Keys for Subquery Broadcast|
+|[#10871](https://github.com/NVIDIA/spark-rapids/pull/10871)|Add classloader diagnostics to initShuffleManager error message|
+|[#10933](https://github.com/NVIDIA/spark-rapids/pull/10933)|Fixed Databricks build|
+|[#10929](https://github.com/NVIDIA/spark-rapids/pull/10929)|Append new authorized user to blossom-ci whitelist [skip ci]|
+
 ## Release 24.06
 
 ### Features
diff --git a/docs/compatibility.md b/docs/compatibility.md
index 71a3927df13..0c745069032 100644
--- a/docs/compatibility.md
+++ b/docs/compatibility.md
@@ -316,118 +316,102 @@ case.
 
 ## JSON
 
-The JSON format read is an experimental feature which is expected to have some issues, so we disable
-it by default. If you would like to test it, you need to enable `spark.rapids.sql.format.json.enabled` and
-`spark.rapids.sql.format.json.read.enabled`.
+JSON, despite being a standard format, has some ambiguity in it. Spark also offers the ability to allow 
+some invalid JSON to be parsed. We have tried to provide JSON parsing that is compatible with 
+what Apache Spark does support. Note that Spark itself has changed through different releases, and we will
+try to call out which releases we offer different results for. JSON parsing is enabled by default
+except for date and timestamp types where we still have work to complete. If you wish to disable
+JSON Scan you can set `spark.rapids.sql.format.json.enabled` or
+`spark.rapids.sql.format.json.read.enabled` to false. To disable `from_json` you can set 
+`spark.rapids.sql.expression.JsonToStructs` to false.
 
-### Invalid JSON
+### Limits
 
-In Apache Spark on the CPU if a line in the JSON file is invalid the entire row is considered
-invalid and will result in nulls being returned for all columns. It is considered invalid if it
-violates the JSON specification, but with a few extensions.
+In versions of Spark before 3.5.0 there is no maximum to how deeply nested JSON can be. After 
+3.5.0 this was updated to be 1,000 by default. The current GPU implementation of JSON Scan and 
+`from_json` limits this to 254 no matter what version of Spark is used. If the nesting level is
+over this the JSON is considered invalid and all values will be returned as nulls.
+`get_json_object` and `json_tuple` have a maximum nesting depth of 64. An exception is thrown if
+the nesting depth goes over the maximum.
 
-  * Single quotes are allowed to quote strings and keys
-  * Unquoted values like NaN and Infinity can be parsed as floating point values
-  * Control characters do not need to be replaced with the corresponding escape sequences in a 
-    quoted string.
-  * Garbage at the end of a row, if there is valid JSON at the beginning of the row, is ignored.
+Spark 3.5.0 and above have limits on maximum string length 20,000,000 and maximum number length of
+1,000. We do not have any of these limits on the GPU.
 
-The GPU implementation does the same kinds of validations, but many of them are done on a per-column
-basis, which, for example, means if a number is formatted incorrectly, it is likely only that value
-will be considered invalid and return a null instead of nulls for the entire row.  
+We, like Spark, cannot support an JSON string that is larger than 2 GiB is size.
 
-There are options that can be used to enable and disable many of these features which are mostly
-listed below.
+### JSON Validation
 
-### JSON options
+Spark supports the option `allowNonNumericNumbers`. Versions of Spark prior to 3.3.0 where inconsistent between
+quoted and non-quoted values ([SPARK-38060](https://issues.apache.org/jira/browse/SPARK-38060)). The
+GPU implementation is consistent with 3.3.0 and above.
 
-Spark supports passing options to the JSON parser when reading a dataset.  In most cases if the RAPIDS Accelerator
-sees one of these options that it does not support it will fall back to the CPU. In some cases we do not. The
-following options are documented below.
+### JSON Floating Point Types
 
-- `allowNumericLeadingZeros`  - Allows leading zeros in numbers (e.g. 00012). By default this is set to false.
-  When it is false Spark considers the JSON invalid if it encounters this type of number. The RAPIDS
-  Accelerator supports validating columns that are returned to the user with this option on or off. 
+Parsing floating-point values has the same limitations as [casting from string to float](#string-to-float).
 
-- `allowUnquotedControlChars` - Allows JSON Strings to contain unquoted control characters (ASCII characters with
-  value less than 32, including tab and line feed characters) or not. By default this is set to false. If the schema
-  is provided while reading JSON file, then this flag has no impact on the RAPIDS Accelerator as it always allows
-  unquoted control characters but Spark sees these are invalid are returns nulls. However, if the schema is not provided
-  and this option is false, then RAPIDS Accelerator's behavior is same as Spark where an exception is thrown
-  as discussed in `JSON Schema discovery` section.
+### JSON Integral Types
 
-- `allowNonNumericNumbers` - Allows `NaN` and `Infinity` values to be parsed (note that these are not valid numeric
-  values in the [JSON specification](https://json.org)). Spark versions prior to 3.3.0 have inconsistent behavior and will
-  parse some variants of `NaN` and `Infinity` even when this option is disabled
-  ([SPARK-38060](https://issues.apache.org/jira/browse/SPARK-38060)). The RAPIDS Accelerator behavior is consistent with
-  Spark version 3.3.0 and later.
+Versions of Spark prior to 3.3.0 would parse quoted integer values, like "1". But 3.3.0 and above consider
+these to be invalid and will return `null` when parsed as an Integral types. The GPU implementation
+follows 3.3.0 and above.
 
-### Nesting
-In versions of Spark before 3.5.0 there is no maximum to how deeply nested JSON can be.  After 
-3.5.0 this was updated to be 1000 by default. The current GPU implementation limits this to 254 
-no matter what version of Spark is used. If the nesting level is over this the JSON is considered
-invalid and all values will be returned as nulls.
+### JSON Decimal Types
 
-Mixed types can have some problems. If an item being read could have some lines that are arrays 
-and others that are structs/dictionaries it is possible an error will be thrown.
+Spark supports parsing decimal types either formatted as floating point number or integral numbers, even if it is
+in a quoted string. If it is in a quoted string the local of the JVM is used to determine the number format.
+If the local is not for the `US`, which is the default we will fall back to the CPU because we do not currently
+parse those numbers correctly. The `US` format removes all commas ',' from the quoted string.
+As a part of this, though, non-arabic numbers are also supported. We do not support parsing these numbers
+see (issue 10532)[https://github.com/NVIDIA/spark-rapids/issues/10532].
 
-Dates and Timestamps have some issues and may return values for technically invalid inputs.
+### JSON Date/Timestamp Types 
 
-Floating point numbers have issues generally like with the rest of Spark, and we can parse them into
-a valid floating point number, but it might not match 100% with the way Spark does it.
+Dates and timestamps are not supported by default in JSON parser, since the GPU implementation is not 100%
+compatible with Apache Spark.
+If needed, they can be turned on through the config `spark.rapids.sql.json.read.datetime.enabled`.
+This config works for both JSON scan and `from_json`. Once enabled, the JSON parser still does
+not support the `TimestampNTZ` type and will fall back to CPU if `spark.sql.timestampType` is set
+to `TIMESTAMP_NTZ` or if an explicit schema is provided that contains the `TimestampNTZ` type.
 
-Strings are supported, but the data returned might not be normalized in the same way as the CPU
-implementation. Generally this comes down to the GPU not modifying the input, whereas Spark will
-do things like remove extra white space and parse numbers before turning them back into a string.
+There is currently no support for reading numeric values as timestamps and null values are returned instead
+([#4940](https://github.com/NVIDIA/spark-rapids/issues/4940)). A workaround would be to read as longs and then cast to timestamp.
 
-### JSON Floating Point
+### JSON Arrays and Structs with Overflowing Numbers
 
-Parsing floating-point values has the same limitations as [casting from string to float](#string-to-float).
+Spark is inconsistent between versions in how it handles numbers that overflow that are nested in either an array
+or a non-top-level struct. In some versions only the value that overflowed is marked as null. In other versions the
+wrapping array or struct is marked as null. We currently only mark the individual value as null. This matches 
+versions 3.4.2 and above of Spark for structs. Arrays on most versions of spark invalidate the entire array if there
+is a single value that overflows within it.
 
-Prior to Spark 3.3.0, reading JSON strings such as `"+Infinity"` when specifying that the data type is `FloatType`
-or `DoubleType` caused these values to be parsed even when `allowNonNumericNumbers` is set to false. Also, Spark
-versions prior to 3.3.0 only supported the `"Infinity"` and `"-Infinity"` representations of infinity and did not
-support `"+INF"`, `"-INF"`, or `"+Infinity"`, which Spark considers valid when unquoted. The GPU JSON reader is
-consistent with the behavior in Spark 3.3.0 and later.
+### Duplicate Struct Names
 
-Another limitation of the GPU JSON reader is that it will parse strings containing non-string boolean or numeric values where
-Spark will treat them as invalid inputs and will just return `null`.
+The JSON specification technically allows for duplicate keys in a struct, but does not explain what to 
+do with them. In the case of Spark it is inconsistent between operators which value wins. `get_json_object`
+depends on the query being performed. We do not always match what Spark does. We do match it in many cases,
+but we consider this enough of a corner case that we have not tried to make it work in all cases.
 
-### JSON Timestamps/Dates
+We also do not support schemas where there are duplicate column names. We just fall back to the CPU for those cases.
 
-The JSON parser does not support the `TimestampNTZ` type and will fall back to CPU if `spark.sql.timestampType` is
-set to `TIMESTAMP_NTZ` or if an explicit schema is provided that contains the `TimestampNTZ` type.
-
-There is currently no support for reading numeric values as timestamps and null values are returned instead
-([#4940](https://github.com/NVIDIA/spark-rapids/issues/4940)). A workaround would be to read as longs and then cast
-to timestamp.
+### JSON Normalization (String Types)
 
-### JSON Schema discovery
+In versions of Spark prior to 4.0.0 input JSON Strings were parsed to JSON tokens and then converted back to
+strings. This effectively normalizes the output string. So things like single quotes are transformed into double
+quotes, floating point numbers are parsed and converted back to strings possibly changing the format, and
+escaped characters are converted back to their simplest form. We try to support this on the GPU as well. Single quotes
+will be converted to double quotes. Only `get_json_object` and `json_tuple` attempt to normalize floating point
+numbers. There is no implementation on the GPU right now that tries to normalize escape characters.
 
-Spark SQL can automatically infer the schema of a JSON dataset if schema is not provided explicitly. The CPU
-handles schema discovery and there is no GPU acceleration of this. By default Spark will read/parse the entire
-dataset to determine the schema. This means that some options/errors which are ignored by the GPU may still
-result in an exception if used with schema discovery.
+### `from_json` Function
 
-### `from_json` function
-
-`JsonToStructs` of `from_json` is based on the same code as reading a JSON lines file.  There are
+`JsonToStructs` or `from_json` is based on the same code as reading a JSON lines file.  There are
 a few differences with it.
 
-The `from_json` function is disabled by default because it is experimental and has some known
-incompatibilities with Spark, and can be enabled by setting 
-`spark.rapids.sql.expression.JsonToStructs=true`. You don't need to set 
-`spark.rapids.sql.format.json.enabled` and`spark.rapids.sql.format.json.read.enabled` to true.
-
-There is no schema discovery as a schema is required as input to `from_json`
-
-In addition to `structs`, a top level `map` type is supported, but only if the key and value are
-strings.
-
-### `to_json` function
+The main difference is that `from_json` supports parsing Maps and Arrays directly from a JSON column, whereas
+JSON Scan only supports parsing top level structs. The GPU implementation of `from_json` has support for parsing
+a `MAP<STRING,STRING>` as a top level schema, but does not currently support arrays at the top level.
 
-The `to_json` function is disabled by default because it is experimental and has some known incompatibilities 
-with Spark, and can be enabled by setting `spark.rapids.sql.expression.StructsToJson=true`.
+### `to_json` Function
 
 Known issues are:
 
@@ -435,7 +419,7 @@ Known issues are:
   produce `-4.1243574E26` but the GPU may produce `-4.124357351E26`.
 - Not all JSON options are respected
 
-### get_json_object
+### `get_json_object` Function
 
 Known issue:
 - [Floating-point number normalization error](https://github.com/NVIDIA/spark-rapids-jni/issues/1922). `get_json_object` floating-point number normalization on the GPU could sometimes return incorrect results if the string contains high-precision values, see the String to Float and Float to String section for more details.
@@ -477,17 +461,16 @@ These are the known edge cases where running on the GPU will produce different r
  next to a newline or a repetition that produces zero or more results
  ([#5610](https://github.com/NVIDIA/spark-rapids/pull/5610))`
 - Word and non-word boundaries, `\b` and `\B`
-- Line anchor `$` will incorrectly match any of the unicode characters `\u0085`, `\u2028`, or `\u2029` followed by
-  another line-terminator, such as `\n`. For example, the pattern `TEST$` will match `TEST\u0085\n` on the GPU but
-  not on the CPU ([#7585](https://github.com/NVIDIA/spark-rapids/issues/7585)).
 
 The following regular expression patterns are not yet supported on the GPU and will fall back to the CPU.
 
 - Line anchors `^` and `$` are not supported in some contexts, such as when combined with a choice (`^|a` or `$|a`).
 - String anchor `\Z` is not supported by `regexp_replace`, and in some rare contexts.
-- String anchor `\z` is not supported
-- Patterns containing an end of line or string anchor immediately next to a newline or repetition that produces zero
+- String anchor `\z` is not supported.
+- Patterns containing an end-of-line or string anchor immediately next to a newline or repetition that produces zero
   or more results
+- Patterns containing end-of-line anchors like `$` or `\Z` immediately followed by 
+  escape sequences (e.g., `\w`, `\b`) are not supported.
 - Line anchor `$` and string anchors `\Z` are not supported in patterns containing `\W` or `\D`
 - Line and string anchors are not supported by `string_split` and `str_to_map`
 - Lazy quantifiers within a choice block such as `(2|\u2029??)+` 
@@ -652,16 +635,19 @@ guaranteed to produce the same results as the CPU:
 - `yyyy/MM/dd`
 - `yyyy-MM-dd`
 - `yyyyMMdd`
+- `yyyymmdd`
 - `yyyy/MM/dd HH:mm:ss`
 - `yyyy-MM-dd HH:mm:ss`
+- `yyyyMMdd HH:mm:ss`
 
 LEGACY timeParserPolicy support has the following limitations when running on the GPU:
 
 - Only 4 digit years are supported
 - The proleptic Gregorian calendar is used instead of the hybrid Julian+Gregorian calendar
   that Spark uses in legacy mode
-- When format is `yyyyMMdd`, GPU only supports 8 digit strings. Spark supports like 7 digit
-  `2024101` string while GPU does not support. Only tested `UTC` and `Asia/Shanghai` timezones.
+- When format is/contains `yyyyMMdd` or `yyyymmdd`, GPU only supports 8 digit strings for these formats.
+  Spark supports like 7 digit `2024101` string while GPU does not support. Only tested `UTC` and
+  `Asia/Shanghai` timezones.
 
 ## Formatting dates and timestamps as strings
 
diff --git a/docs/configs.md b/docs/configs.md
index 4ef4d8efb3c..75076bafe7c 100644
--- a/docs/configs.md
+++ b/docs/configs.md
@@ -10,7 +10,7 @@ The following is the list of options that `rapids-plugin-4-spark` supports.
 On startup use: `--conf [conf key]=[conf value]`. For example:
 
 ```
-${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.10.1-cuda11.jar \
+${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.12.0-cuda11.jar \
 --conf spark.plugins=com.nvidia.spark.SQLPlugin \
 --conf spark.rapids.sql.concurrentGpuTasks=2
 ```
@@ -37,7 +37,7 @@ Name | Description | Default Value | Applicable at
 <a name="memory.gpu.minAllocFraction"></a>spark.rapids.memory.gpu.minAllocFraction|The fraction of total GPU memory that limits the minimum size of the RMM pool. The value must be less than or equal to the setting for spark.rapids.memory.gpu.allocFraction.|0.25|Startup
 <a name="memory.host.spillStorageSize"></a>spark.rapids.memory.host.spillStorageSize|Amount of off-heap host memory to use for buffering spilled GPU data before spilling to local disk. Use -1 to set the amount to the combined size of pinned and pageable memory pools.|-1|Startup
 <a name="memory.pinnedPool.size"></a>spark.rapids.memory.pinnedPool.size|The size of the pinned memory pool in bytes unless otherwise specified. Use 0 to disable the pool.|0|Startup
-<a name="sql.batchSizeBytes"></a>spark.rapids.sql.batchSizeBytes|Set the target number of bytes for a GPU batch. Splits sizes for input data is covered by separate configs. The maximum setting is 2 GB to avoid exceeding the cudf row count limit of a column.|1073741824|Runtime
+<a name="sql.batchSizeBytes"></a>spark.rapids.sql.batchSizeBytes|Set the target number of bytes for a GPU batch. Splits sizes for input data is covered by separate configs.|1073741824|Runtime
 <a name="sql.concurrentGpuTasks"></a>spark.rapids.sql.concurrentGpuTasks|Set the number of tasks that can execute concurrently per GPU. Tasks may temporarily block when the number of concurrent tasks in the executor exceeds this amount. Allowing too many concurrent tasks on the same GPU may lead to GPU out of memory errors.|2|Runtime
 <a name="sql.enabled"></a>spark.rapids.sql.enabled|Enable (true) or disable (false) sql operations on the GPU|true|Runtime
 <a name="sql.explain"></a>spark.rapids.sql.explain|Explain why some parts of a query were not placed on a GPU or not. Possible values are ALL: print everything, NONE: print nothing, NOT_ON_GPU: print only parts of a query that did not go on the GPU|NOT_ON_GPU|Runtime
@@ -45,7 +45,6 @@ Name | Description | Default Value | Applicable at
 <a name="sql.multiThreadedRead.numThreads"></a>spark.rapids.sql.multiThreadedRead.numThreads|The maximum number of threads on each executor to use for reading small files in parallel. This can not be changed at runtime after the executor has started. Used with COALESCING and MULTITHREADED readers, see spark.rapids.sql.format.parquet.reader.type, spark.rapids.sql.format.orc.reader.type, or spark.rapids.sql.format.avro.reader.type for a discussion of reader types. If it is not set explicitly and spark.executor.cores is set, it will be tried to assign value of `max(MULTITHREAD_READ_NUM_THREADS_DEFAULT, spark.executor.cores)`, where MULTITHREAD_READ_NUM_THREADS_DEFAULT = 20.|20|Startup
 <a name="sql.reader.batchSizeBytes"></a>spark.rapids.sql.reader.batchSizeBytes|Soft limit on the maximum number of bytes the reader reads per batch. The readers will read chunks of data until this limit is met or exceeded. Note that the reader may estimate the number of bytes that will be used on the GPU in some cases based on the schema and number of rows in each batch.|2147483647|Runtime
 <a name="sql.reader.batchSizeRows"></a>spark.rapids.sql.reader.batchSizeRows|Soft limit on the maximum number of rows the reader will read per batch. The orc and parquet readers will read row groups until this limit is met or exceeded. The limit is respected by the csv reader.|2147483647|Runtime
-<a name="sql.shuffle.spillThreads"></a>spark.rapids.sql.shuffle.spillThreads|Number of threads used to spill shuffle data to disk in the background.|6|Runtime
 <a name="sql.udfCompiler.enabled"></a>spark.rapids.sql.udfCompiler.enabled|When set to true, Scala UDFs will be considered for compilation as Catalyst expressions|false|Runtime
 
 For more advanced configs, please refer to the [RAPIDS Accelerator for Apache Spark Advanced Configuration](./additional-functionality/advanced_configs.md) page.
diff --git a/docs/dev/shimplify.md b/docs/dev/shimplify.md
index a8f075016ae..cd9100ff447 100644
--- a/docs/dev/shimplify.md
+++ b/docs/dev/shimplify.md
@@ -65,7 +65,15 @@ validations:
   * The file is stored under the *owner shim* directory.
 
 * All files participating listing the `buildver` of the current Maven build session are symlinked to
-`target/${buildver}/generated/src/(main|test)/(scala|java)`. Thus, instead of hardcoding distinct
+`target/${buildver}/generated/src/(main|test)/(scala|java)`
+except for template classes requiring spark.version.classifier in the package name.
+
+* If the package name of a class such as RapidsShuffleManager contains `$_spark.version.classifier_`
+(because it is source-identical across shims up to the package name) it will be materialized in the
+`target/${buildver}/generated/src/(main|test)/(scala|java)` with `spark.version.classifier`
+interpolated into the package name.
+
+Thus, instead of hardcoding distinct
 lists of directories for `build-helper` Maven plugin to add (one for each shim) after the full
 transition to shimplify, the pom will have only 4 add source statements that is independent of the
 number of supported shims.
diff --git a/docs/dev/shims.md b/docs/dev/shims.md
index 281915f18c5..0d62eb4cae8 100644
--- a/docs/dev/shims.md
+++ b/docs/dev/shims.md
@@ -68,17 +68,17 @@ Using JarURLConnection URLs we create a Parallel World of the current version wi
 Spark 3.0.2's URLs:
 
 ```text
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark-shared/
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark302/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark-shared/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark302/
 ```
 
 Spark 3.2.0's URLs :
 
 ```text
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark-shared/
-jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark320/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark-shared/
+jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark320/
 ```
 
 ### Late Inheritance in Public Classes
diff --git a/docs/dev/testing.md b/docs/dev/testing.md
index af4d97d1699..9f1c33091f1 100644
--- a/docs/dev/testing.md
+++ b/docs/dev/testing.md
@@ -5,5 +5,5 @@ nav_order: 2
 parent: Developer Overview
 ---
 An overview of testing can be found within the repository at:
-* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-24.10/tests#readme)
-* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-24.10/integration_tests#readme)
+* [Unit tests](https://github.com/NVIDIA/spark-rapids/tree/branch-24.12/tests#readme)
+* [Integration testing](https://github.com/NVIDIA/spark-rapids/tree/branch-24.12/integration_tests#readme)
diff --git a/docs/download.md b/docs/download.md
index 572d342a07b..60c62071f8b 100644
--- a/docs/download.md
+++ b/docs/download.md
@@ -27,7 +27,8 @@ The plugin is tested on the following architectures:
 
 ### Software Requirements:
 
-	OS: Ubuntu 20.04, Ubuntu 22.04, CentOS 7, or Rocky Linux 8
+    OS: Spark RAPIDS is compatible with any Linux distribution with glibc >= 2.28 (Please check ldd --version output).  glibc 2.28 was released August 1, 2018. 
+    Tested on Ubuntu 20.04, Ubuntu 22.04, Rocky Linux 8 and Rocky Linux 9
 
 	NVIDIA Driver*: R470+
 
diff --git a/docs/supported_ops.md b/docs/supported_ops.md
index 8f6a9ca0e5f..acf7133af40 100644
--- a/docs/supported_ops.md
+++ b/docs/supported_ops.md
@@ -9279,7 +9279,7 @@ are limited.
 <td rowSpan="2">JsonToStructs</td>
 <td rowSpan="2">`from_json`</td>
 <td rowSpan="2">Returns a struct value with the given `jsonStr` and `schema`</td>
-<td rowSpan="2">This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case</td>
+<td rowSpan="2">None</td>
 <td rowSpan="2">project</td>
 <td>jsonStr</td>
 <td> </td>
@@ -9320,7 +9320,7 @@ are limited.
 <td> </td>
 <td> </td>
 <td><b>NS</b></td>
-<td><em>PS<br/>MAP only supports keys and values that are of STRING type;<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT, DAYTIME, YEARMONTH</em></td>
+<td><em>PS<br/>MAP only supports keys and values that are of STRING type and is only supported at the top level;<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT, DAYTIME, YEARMONTH</em></td>
 <td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP;<br/>unsupported child types NULL, BINARY, CALENDAR, MAP, UDT, DAYTIME, YEARMONTH</em></td>
 <td> </td>
 <td> </td>
@@ -11493,6 +11493,103 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<td rowSpan="4">MonthsBetween</td>
+<td rowSpan="4">`months_between`</td>
+<td rowSpan="4">If `timestamp1` is later than `timestamp2`, then the result is positive. If `timestamp1` and `timestamp2` are on the same day of month, or both are the last day of month, time of day will be ignored. Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits unless roundOff=false.</td>
+<td rowSpan="4">None</td>
+<td rowSpan="4">project</td>
+<td>timestamp1</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr>
+<td>timestamp2</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr>
+<td>round</td>
+<td><em>PS<br/>Literal value only</em></td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr>
+<td>result</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td>S</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr>
 <td rowSpan="6">Multiply</td>
 <td rowSpan="6">`*`</td>
 <td rowSpan="6">Multiplication</td>
@@ -11637,6 +11734,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">Murmur3Hash</td>
 <td rowSpan="2">`hash`</td>
 <td rowSpan="2">Murmur3 hash operator</td>
@@ -11762,34 +11887,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="1">NamedLambdaVariable</td>
 <td rowSpan="1"> </td>
 <td rowSpan="1">A parameter to a higher order SQL function</td>
@@ -12041,6 +12138,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">Or</td>
 <td rowSpan="6">`or`</td>
 <td rowSpan="6">Logical OR</td>
@@ -12185,34 +12310,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="4">ParseUrl</td>
 <td rowSpan="4">`parse_url`</td>
 <td rowSpan="4">Extracts a part from a URL</td>
@@ -12435,6 +12532,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">PosExplode</td>
 <td rowSpan="2">`posexplode_outer`, `posexplode`</td>
 <td rowSpan="2">Given an input array produces a sequence of rows for each value in the array</td>
@@ -12630,34 +12755,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="2">PreciseTimestampConversion</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">Expression used internally to convert the TimestampType to Long and back without losing precision, i.e. in microseconds. Used in time windowing</td>
@@ -12952,6 +13049,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">Quarter</td>
 <td rowSpan="2">`quarter`</td>
 <td rowSpan="2">Returns the quarter of the year for date, in the range 1 to 4</td>
@@ -13077,34 +13202,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="2">RaiseError</td>
 <td rowSpan="2">`raise_error`</td>
 <td rowSpan="2">Throw an exception</td>
@@ -13355,6 +13452,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">RegExpExtractAll</td>
 <td rowSpan="4">`regexp_extract_all`</td>
 <td rowSpan="4">Extract all strings matching a regular expression corresponding to the regex group index</td>
@@ -13572,34 +13697,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="3">Remainder</td>
 <td rowSpan="3">`%`, `mod`</td>
 <td rowSpan="3">Remainder or modulo</td>
@@ -13776,6 +13873,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">Rint</td>
 <td rowSpan="4">`rint`</td>
 <td rowSpan="4">Rounds up a double value to the nearest double equal to an integer</td>
@@ -13976,34 +14101,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="2">ScalaUDF</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">User Defined Function, the UDF can choose to implement a RAPIDS accelerated interface to get better performance.</td>
@@ -14254,6 +14351,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="3">ShiftLeft</td>
 <td rowSpan="3">`shiftleft`</td>
 <td rowSpan="3">Bitwise shift left (<<)</td>
@@ -14402,34 +14527,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="3">ShiftRightUnsigned</td>
 <td rowSpan="3">`shiftrightunsigned`</td>
 <td rowSpan="3">Bitwise unsigned shift right (>>>)</td>
@@ -14653,33 +14750,61 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<td rowSpan="4">Sinh</td>
-<td rowSpan="4">`sinh`</td>
-<td rowSpan="4">Hyperbolic sine</td>
-<td rowSpan="4">None</td>
-<td rowSpan="2">project</td>
-<td>input</td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td>S</td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-</tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
+<td rowSpan="4">Sinh</td>
+<td rowSpan="4">`sinh`</td>
+<td rowSpan="4">Hyperbolic sine</td>
+<td rowSpan="4">None</td>
+<td rowSpan="2">project</td>
+<td>input</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td>S</td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
 <tr>
 <td>result</td>
 <td> </td>
@@ -14802,34 +14927,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="3">SortArray</td>
 <td rowSpan="3">`sort_array`</td>
 <td rowSpan="3">Returns a sorted array with the input array and the ascending / descending order</td>
@@ -15057,6 +15154,34 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">Sqrt</td>
 <td rowSpan="4">`sqrt`</td>
 <td rowSpan="4">Square root</td>
@@ -15229,34 +15354,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="3">StartsWith</td>
 <td rowSpan="3"> </td>
 <td rowSpan="3">Starts with</td>
@@ -15502,6 +15599,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">StringLocate</td>
 <td rowSpan="4">`locate`, `position`</td>
 <td rowSpan="4">Substring search operator</td>
@@ -15696,34 +15821,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="3">StringRepeat</td>
 <td rowSpan="3">`repeat`</td>
 <td rowSpan="3">StringRepeat operator that repeats the given strings with numbers of times given by repeatTimes</td>
@@ -15895,6 +15992,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">StringSplit</td>
 <td rowSpan="4">`split`</td>
 <td rowSpan="4">Splits `str` around occurrences that match `regex`</td>
@@ -16089,34 +16214,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="4">StringTranslate</td>
 <td rowSpan="4">`translate`</td>
 <td rowSpan="4">StringTranslate operator</td>
@@ -16288,6 +16385,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="3">StringTrimLeft</td>
 <td rowSpan="3">`ltrim`</td>
 <td rowSpan="3">StringTrimLeft operator</td>
@@ -16487,34 +16612,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="4">Substring</td>
 <td rowSpan="4">`substr`, `substring`</td>
 <td rowSpan="4">Substring operator</td>
@@ -16709,6 +16806,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">Subtract</td>
 <td rowSpan="6">`-`</td>
 <td rowSpan="6">Subtraction</td>
@@ -16951,34 +17076,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="4">Tanh</td>
 <td rowSpan="4">`tanh`</td>
 <td rowSpan="4">Hyperbolic tangent</td>
@@ -17151,6 +17248,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">ToDegrees</td>
 <td rowSpan="2">`degrees`</td>
 <td rowSpan="2">Converts radians to degrees</td>
@@ -17393,40 +17518,12 @@ are limited.
 <td> </td>
 <td> </td>
 <td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-<td> </td>
-</tr>
-<tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
+<td> </td>
 </tr>
 <tr>
 <td rowSpan="3">TransformKeys</td>
@@ -17577,6 +17674,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="4">UnaryMinus</td>
 <td rowSpan="4">`negative`</td>
 <td rowSpan="4">Negate a numeric value</td>
@@ -17801,34 +17926,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="1">UnboundedPreceding$</td>
 <td rowSpan="1"> </td>
 <td rowSpan="1">Special boundary for a window frame, indicating all rows preceding the current row</td>
@@ -17982,6 +18079,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">Upper</td>
 <td rowSpan="2">`ucase`, `upper`</td>
 <td rowSpan="2">String uppercase operator</td>
@@ -18232,34 +18357,6 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="2">XxHash64</td>
 <td rowSpan="2">`xxhash64`</td>
 <td rowSpan="2">xxhash64 hash operator</td>
@@ -18576,6 +18673,34 @@ are limited.
 <td>S</td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="8">ApproximatePercentile</td>
 <td rowSpan="8">`approx_percentile`, `percentile_approx`</td>
 <td rowSpan="8">Approximate percentile</td>
@@ -18766,34 +18891,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">Average</td>
 <td rowSpan="6">`avg`, `mean`</td>
 <td rowSpan="6">Average aggregate operator</td>
@@ -19084,6 +19181,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">CollectSet</td>
 <td rowSpan="6">`collect_set`</td>
 <td rowSpan="6">Collect a set of unique elements, not supported in reduction</td>
@@ -19229,34 +19354,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">Count</td>
 <td rowSpan="6">`count`</td>
 <td rowSpan="6">Count aggregate operator</td>
@@ -19547,6 +19644,34 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">Last</td>
 <td rowSpan="6">`last_value`, `last`</td>
 <td rowSpan="6">last aggregate operator</td>
@@ -19692,34 +19817,6 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">Max</td>
 <td rowSpan="6">`max`</td>
 <td rowSpan="6">Max aggregate operator</td>
@@ -20009,6 +20106,34 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">Min</td>
 <td rowSpan="6">`min`</td>
 <td rowSpan="6">Min aggregate operator</td>
@@ -20154,34 +20279,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">MinBy</td>
 <td rowSpan="6">`min_by`</td>
 <td rowSpan="6">MinBy aggregate operator. It may produce different results than CPU when multiple rows in a group have same minimum value in the ordering column and different associated values in the value column.</td>
@@ -20516,6 +20613,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">PivotFirst</td>
 <td rowSpan="6"> </td>
 <td rowSpan="6">PivotFirst operator</td>
@@ -20660,34 +20785,6 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">StddevPop</td>
 <td rowSpan="6">`stddev_pop`</td>
 <td rowSpan="6">Aggregation computing population standard deviation</td>
@@ -20978,6 +21075,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="6">Sum</td>
 <td rowSpan="6">`sum`</td>
 <td rowSpan="6">Sum aggregate operator</td>
@@ -21123,34 +21248,6 @@ are limited.
 <td> </td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="6">VariancePop</td>
 <td rowSpan="6">`var_pop`</td>
 <td rowSpan="6">Aggregation computing population variance</td>
@@ -21441,6 +21538,34 @@ are limited.
 <td> </td>
 </tr>
 <tr>
+<th>Expression</th>
+<th>SQL Functions(s)</th>
+<th>Description</th>
+<th>Notes</th>
+<th>Context</th>
+<th>Param/Output</th>
+<th>BOOLEAN</th>
+<th>BYTE</th>
+<th>SHORT</th>
+<th>INT</th>
+<th>LONG</th>
+<th>FLOAT</th>
+<th>DOUBLE</th>
+<th>DATE</th>
+<th>TIMESTAMP</th>
+<th>STRING</th>
+<th>DECIMAL</th>
+<th>NULL</th>
+<th>BINARY</th>
+<th>CALENDAR</th>
+<th>ARRAY</th>
+<th>MAP</th>
+<th>STRUCT</th>
+<th>UDT</th>
+<th>DAYTIME</th>
+<th>YEARMONTH</th>
+</tr>
+<tr>
 <td rowSpan="2">NormalizeNaNAndZero</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">Normalize NaN and zero</td>
@@ -21520,34 +21645,6 @@ are limited.
 <td><b>NS</b></td>
 </tr>
 <tr>
-<th>Expression</th>
-<th>SQL Functions(s)</th>
-<th>Description</th>
-<th>Notes</th>
-<th>Context</th>
-<th>Param/Output</th>
-<th>BOOLEAN</th>
-<th>BYTE</th>
-<th>SHORT</th>
-<th>INT</th>
-<th>LONG</th>
-<th>FLOAT</th>
-<th>DOUBLE</th>
-<th>DATE</th>
-<th>TIMESTAMP</th>
-<th>STRING</th>
-<th>DECIMAL</th>
-<th>NULL</th>
-<th>BINARY</th>
-<th>CALENDAR</th>
-<th>ARRAY</th>
-<th>MAP</th>
-<th>STRUCT</th>
-<th>UDT</th>
-<th>DAYTIME</th>
-<th>YEARMONTH</th>
-</tr>
-<tr>
 <td rowSpan="2">HiveGenericUDF</td>
 <td rowSpan="2"> </td>
 <td rowSpan="2">Hive Generic UDF, the UDF can choose to implement a RAPIDS accelerated interface to get better performance</td>
@@ -23058,8 +23155,8 @@ dates or timestamps, or for a lack of type coercion support.
 <td>S</td>
 <td>S</td>
 <td>S</td>
-<td>S</td>
-<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
+<td><em>PS<br/>DATE is not supported by default due to compatibility</em></td>
+<td><em>PS<br/>TIMESTAMP is not supported by default due to compatibility;<br/>UTC is only supported TZ for TIMESTAMP</em></td>
 <td>S</td>
 <td>S</td>
 <td> </td>
diff --git a/integration_tests/README.md b/integration_tests/README.md
index 7c1486418aa..f8b6d9510ff 100644
--- a/integration_tests/README.md
+++ b/integration_tests/README.md
@@ -263,7 +263,7 @@ individually, so you don't risk running unit tests along with the integration te
 http://www.scalatest.org/user_guide/using_the_scalatest_shell
 
 ```shell
-spark-shell --jars rapids-4-spark-tests_2.12-24.10.1-tests.jar,rapids-4-spark-integration-tests_2.12-24.10.1-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar
+spark-shell --jars rapids-4-spark-tests_2.12-24.12.0-tests.jar,rapids-4-spark-integration-tests_2.12-24.12.0-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar
 ```
 
 First you import the `scalatest_shell` and tell the tests where they can find the test files you
@@ -286,7 +286,7 @@ If you just want to verify the SQL replacement is working you will need to add t
 assumes CUDA 11.0 is being used and the Spark distribution is built with Scala 2.12.
 
 ```
-$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.10.1-cuda11.jar" ./runtests.py
+$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.12.0-cuda11.jar" ./runtests.py
 ```
 
 You don't have to enable the plugin for this to work, the test framework will do that for you.
@@ -443,7 +443,7 @@ To run cudf_udf tests, need following configuration changes:
 As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 11.0:
 
 ```
-$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.10.1-cuda11.jar,rapids-4-spark-tests_2.12-24.10.1.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-24.10.1-cuda11.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-24.10.1-cuda11.jar" ./runtests.py --cudf_udf
+$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-24.12.0-cuda11.jar,rapids-4-spark-tests_2.12-24.12.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-24.12.0-cuda11.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-24.12.0-cuda11.jar" ./runtests.py --cudf_udf
 ```
 
 ### Enabling fuzz tests
diff --git a/integration_tests/ScaleTest.md b/integration_tests/ScaleTest.md
index a72125f59f9..bea34954cab 100644
--- a/integration_tests/ScaleTest.md
+++ b/integration_tests/ScaleTest.md
@@ -97,7 +97,7 @@ $SPARK_HOME/bin/spark-submit \
 --conf spark.sql.parquet.datetimeRebaseModeInWrite=CORRECTED \
 --jars $SPARK_HOME/examples/jars/scopt_2.12-3.7.1.jar \
 --class com.nvidia.spark.rapids.tests.scaletest.ScaleTest \
-./target/rapids-4-spark-integration-tests_2.12-24.10.1-spark332.jar \
+./target/rapids-4-spark-integration-tests_2.12-24.12.0-spark332.jar \
 10 \
 100 \
 parquet \
diff --git a/integration_tests/pom.xml b/integration_tests/pom.xml
index 19fcf18ba83..bac78bce0df 100644
--- a/integration_tests/pom.xml
+++ b/integration_tests/pom.xml
@@ -22,11 +22,11 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-integration-tests_2.12</artifactId>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>integration_tests</rapids.module>
         <target.classifier/>
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index 22a23349791..9bd72b2ada0 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -28,10 +28,10 @@ else
     [[ ! -x "$(command -v zip)" ]] && { echo "fail to find zip command in $PATH"; exit 1; }
     PY4J_TMP=("${SPARK_HOME}"/python/lib/py4j-*-src.zip)
     PY4J_FILE=${PY4J_TMP[0]}
-    # PySpark uses ".dev0" for "-SNAPSHOT",  ".dev" for "preview"
+    # PySpark uses ".dev0" for "-SNAPSHOT" and either ".dev" for "preview" or ".devN" for "previewN"
     # https://github.com/apache/spark/blob/66f25e314032d562567620806057fcecc8b71f08/dev/create-release/release-build.sh#L267
     VERSION_STRING=$(PYTHONPATH=${SPARK_HOME}/python:${PY4J_FILE} python -c \
-        "import pyspark, re; print(re.sub('\.dev0?$', '', pyspark.__version__))"
+        "import pyspark, re; print(re.sub('\.dev[012]?$', '', pyspark.__version__))"
     )
     SCALA_VERSION=`$SPARK_HOME/bin/pyspark --version 2>&1| grep Scala | awk '{split($4,v,"."); printf "%s.%s", v[1], v[2]}'`
 
diff --git a/integration_tests/src/main/python/aqe_test.py b/integration_tests/src/main/python/aqe_test.py
index 3e10f6e9148..5b3b04efdfb 100755
--- a/integration_tests/src/main/python/aqe_test.py
+++ b/integration_tests/src/main/python/aqe_test.py
@@ -19,7 +19,7 @@
 from conftest import is_databricks_runtime, is_not_utc
 from data_gen import *
 from marks import ignore_order, allow_non_gpu
-from spark_session import with_cpu_session, is_databricks113_or_later, is_before_spark_330
+from spark_session import with_cpu_session, is_databricks113_or_later, is_before_spark_330, is_databricks_version_or_later
 
 # allow non gpu when time zone is non-UTC because of https://github.com/NVIDIA/spark-rapids/issues/9653'
 not_utc_aqe_allow=['ShuffleExchangeExec', 'HashAggregateExec'] if is_not_utc() else []
@@ -338,6 +338,8 @@ def do_it(spark):
 
 # this should be fixed by https://github.com/NVIDIA/spark-rapids/issues/11120
 aqe_join_with_dpp_fallback=["FilterExec"] if (is_databricks_runtime() or is_before_spark_330()) else []
+if is_databricks_version_or_later(14, 3):
+    aqe_join_with_dpp_fallback.append("CollectLimitExec")
 
 # Verify that DPP and AQE can coexist in even some odd cases involving multiple tables
 @ignore_order(local=True)
diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py
index 044f1d46322..83cc4922b3b 100644
--- a/integration_tests/src/main/python/cast_test.py
+++ b/integration_tests/src/main/python/cast_test.py
@@ -18,7 +18,7 @@
 from conftest import is_not_utc, is_supported_time_zone, is_dataproc_serverless_runtime
 from data_gen import *
 from spark_session import *
-from marks import allow_non_gpu, approximate_float, datagen_overrides, tz_sensitive_test
+from marks import allow_non_gpu, approximate_float, datagen_overrides, disable_ansi_mode, tz_sensitive_test
 from pyspark.sql.types import *
 from spark_init_internal import spark_version
 from datetime import date, datetime
@@ -26,13 +26,27 @@
 
 _decimal_gen_36_5 = DecimalGen(precision=36, scale=5)
 
-def test_cast_empty_string_to_int():
+
+def test_cast_empty_string_to_int_ansi_off():
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, StringGen(pattern="")).selectExpr(
                 'CAST(a as BYTE)',
                 'CAST(a as SHORT)',
                 'CAST(a as INTEGER)',
-                'CAST(a as LONG)'))
+                'CAST(a as LONG)'),
+                conf=ansi_disabled_conf)
+
+
+@pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/11552")
+def test_cast_empty_string_to_int_ansi_on():
+    assert_gpu_and_cpu_error(
+        lambda spark : unary_op_df(spark, StringGen(pattern="")).selectExpr(
+            'CAST(a as BYTE)',
+            'CAST(a as SHORT)',
+            'CAST(a as INTEGER)',
+            'CAST(a as LONG)').collect(),
+        conf=ansi_enabled_conf,
+        error_message="cannot be cast to ")
 
 # These tests are not intended to be exhaustive. The scala test CastOpSuite should cover
 # just about everything for non-nested values. This is intended to check that the
@@ -61,12 +75,22 @@ def test_cast_nested(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type)))
 
-def test_cast_string_date_valid_format():
+def test_cast_string_date_valid_format_ansi_off():
     # In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
     # This provides values that are valid in all of those formats.
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, StringGen(date_start_1_1_1)).select(f.col('a').cast(DateType())),
-            conf = {'spark.rapids.sql.hasExtendedYearValues': 'false'})
+            conf = copy_and_update(ansi_disabled_conf, {'spark.rapids.sql.hasExtendedYearValues': False}))
+
+
+@pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/11556")
+def test_cast_string_date_valid_format_ansi_on():
+    # In Spark 3.2.0+ the valid format changed, and we cannot support all formats.
+    # This provides values that are valid in all of those formats.
+    assert_gpu_and_cpu_error(
+        lambda spark : unary_op_df(spark, StringGen(date_start_1_1_1)).select(f.col('a').cast(DateType())).collect(),
+        conf = copy_and_update(ansi_enabled_conf, {'spark.rapids.sql.hasExtendedYearValues': False}),
+        error_message="One or more values could not be converted to DateType")
 
 invalid_values_string_to_date = ['200', ' 1970A', '1970 A', '1970T',  # not conform to "yyyy" after trim
                                  '1970 T', ' 1970-01T', '1970-01 A',  # not conform to "yyyy-[M]M" after trim
@@ -94,8 +118,8 @@ def test_cast_string_date_invalid_ansi_before_320():
     data_rows = [(v,) for v in values_string_to_data]
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: spark.createDataFrame(data_rows, "a string").select(f.col('a').cast(DateType())),
-        conf={'spark.rapids.sql.hasExtendedYearValues': 'false',
-              'spark.sql.ansi.enabled': 'true'}, )
+        conf={'spark.rapids.sql.hasExtendedYearValues': False,
+              'spark.sql.ansi.enabled': True}, )
 
 # test Spark versions >= 320 and databricks, ANSI mode, valid values
 @pytest.mark.skipif(is_before_spark_320(), reason="Spark versions(< 320) not support Ansi mode when casting string to date")
@@ -103,8 +127,8 @@ def test_cast_string_date_valid_ansi():
     data_rows = [(v,) for v in valid_values_string_to_date]
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: spark.createDataFrame(data_rows, "a string").select(f.col('a').cast(DateType())),
-        conf={'spark.rapids.sql.hasExtendedYearValues': 'false',
-              'spark.sql.ansi.enabled': 'true'})
+        conf={'spark.rapids.sql.hasExtendedYearValues': False,
+              'spark.sql.ansi.enabled': True})
 
 # test Spark versions >= 320, ANSI mode
 @pytest.mark.skipif(is_before_spark_320(), reason="ansi cast(string as date) throws exception only in 3.2.0+")
@@ -112,8 +136,8 @@ def test_cast_string_date_valid_ansi():
 def test_cast_string_date_invalid_ansi(invalid):
     assert_gpu_and_cpu_error(
         lambda spark: spark.createDataFrame([(invalid,)], "a string").select(f.col('a').cast(DateType())).collect(),
-        conf={'spark.rapids.sql.hasExtendedYearValues': 'false',
-              'spark.sql.ansi.enabled': 'true'},
+        conf={'spark.rapids.sql.hasExtendedYearValues': False,
+              'spark.sql.ansi.enabled': True},
         error_message="DateTimeException")
 
 
@@ -144,7 +168,8 @@ def test_cast_string_date_non_ansi():
     data_rows = [(v,) for v in values_string_to_data]
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: spark.createDataFrame(data_rows, "a string").select(f.col('a').cast(DateType())),
-        conf={'spark.rapids.sql.hasExtendedYearValues': 'false'})
+        conf=copy_and_update(ansi_disabled_conf, {'spark.rapids.sql.hasExtendedYearValues': False}))
+
 
 @pytest.mark.parametrize('data_gen', [StringGen(date_start_1_1_1),
                                       StringGen(date_start_1_1_1 + '[ |T][0-3][0-9]:[0-6][0-9]:[0-6][0-9]'),
@@ -153,32 +178,65 @@ def test_cast_string_date_non_ansi():
                         ids=idfn)
 @tz_sensitive_test
 @allow_non_gpu(*non_utc_allow)
-def test_cast_string_ts_valid_format(data_gen):
+def test_cast_string_ts_valid_format_ansi_off(data_gen):
     # In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
     # This provides values that are valid in all of those formats.
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(TimestampType())),
-            conf = {'spark.rapids.sql.hasExtendedYearValues': 'false',
-                'spark.rapids.sql.castStringToTimestamp.enabled': 'true'})
+            conf = copy_and_update(ansi_disabled_conf,
+                                   {'spark.rapids.sql.hasExtendedYearValues': False,
+                                    'spark.rapids.sql.castStringToTimestamp.enabled': True}))
+
+
+@pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/11556")
+@pytest.mark.parametrize('data_gen', [StringGen(date_start_1_1_1)],
+                         ids=idfn)
+@tz_sensitive_test
+@allow_non_gpu(*non_utc_allow)
+def test_cast_string_ts_valid_format_ansi_on(data_gen):
+    # In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
+    # This provides values that are valid in all of those formats.
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(TimestampType())),
+        conf = copy_and_update(ansi_enabled_conf,
+                               {'spark.rapids.sql.hasExtendedYearValues': False,
+                                'spark.rapids.sql.castStringToTimestamp.enabled': True}))
+
 
 @allow_non_gpu('ProjectExec', 'Cast', 'Alias')
 @pytest.mark.skipif(is_before_spark_320(), reason="Only in Spark 3.2.0+ do we have issues with extended years")
-def test_cast_string_date_fallback():
+def test_cast_string_date_fallback_ansi_off():
+    """
+    This tests that STRING->DATE conversion is run on CPU, via a fallback.
+    The point of this test is to exercise the fallback, and not to examine any errors in casting.
+    There is no change in behaviour between Apache Spark and the plugin, since they're both
+    exercising the CPU implementation.  Therefore, this needn't be tested with ANSI enabled.
+    """
     assert_gpu_fallback_collect(
             # Cast back to String because this goes beyond what python can support for years
             lambda spark : unary_op_df(spark, StringGen('([0-9]|-|\\+){4,12}')).select(f.col('a').cast(DateType()).cast(StringType())),
-            'Cast')
+            'Cast',
+            conf=ansi_disabled_conf)
 
 @allow_non_gpu('ProjectExec', 'Cast', 'Alias')
 @pytest.mark.skipif(is_before_spark_320(), reason="Only in Spark 3.2.0+ do we have issues with extended years")
 def test_cast_string_timestamp_fallback():
+    """
+    This tests that STRING->TIMESTAMP conversion is run on CPU, via a fallback.
+    The point of this test is to exercise the fallback, and not to examine any errors in casting.
+    There is no change in behaviour between Apache Spark and the plugin, since they're both
+    exercising the CPU implementation.  Therefore, this needn't be tested with ANSI enabled.
+    """
     assert_gpu_fallback_collect(
             # Cast back to String because this goes beyond what python can support for years
             lambda spark : unary_op_df(spark, StringGen('([0-9]|-|\\+){4,12}')).select(f.col('a').cast(TimestampType()).cast(StringType())),
             'Cast',
-            conf = {'spark.rapids.sql.castStringToTimestamp.enabled': 'true'})
+            conf = copy_and_update(ansi_disabled_conf,
+                                   {'spark.rapids.sql.castStringToTimestamp.enabled': True}))
 
 
+@disable_ansi_mode  # In ANSI mode, there are restrictions to casting DECIMAL to other types.
+                    # ANSI mode behaviour is tested in test_ansi_cast_decimal_to.
 @approximate_float
 @pytest.mark.parametrize('data_gen', [
     decimal_gen_32bit,
@@ -191,10 +249,10 @@ def test_cast_string_timestamp_fallback():
     DecimalGen(precision=38, scale=10), DecimalGen(precision=36, scale=-5),
     DecimalGen(precision=38, scale=-10)], ids=meta_idfn('from:'))
 @pytest.mark.parametrize('to_type', [ByteType(), ShortType(), IntegerType(), LongType(), FloatType(), DoubleType(), StringType()], ids=meta_idfn('to:'))
-def test_cast_decimal_to(data_gen, to_type):
+def test_with_ansi_disabled_cast_decimal_to(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')),
-            conf = {'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
+            conf = {'spark.rapids.sql.castDecimalToFloat.enabled': True})
 
 @approximate_float
 @pytest.mark.parametrize('data_gen', [
@@ -210,6 +268,8 @@ def test_ansi_cast_decimal_to(data_gen, to_type):
             conf = {'spark.rapids.sql.castDecimalToFloat.enabled': True,
                 'spark.sql.ansi.enabled': True})
 
+
+@disable_ansi_mode  # With ANSI enabled, casting from wider to narrower types will fail.
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10050')
 @pytest.mark.parametrize('data_gen', [
     DecimalGen(7, 1),
@@ -226,10 +286,24 @@ def test_ansi_cast_decimal_to(data_gen, to_type):
     DecimalType(30, -4),
     DecimalType(38, -10),
     DecimalType(1, -1)], ids=meta_idfn('to:'))
-def test_cast_decimal_to_decimal(data_gen, to_type):
+def test_with_ansi_disabled_cast_decimal_to_decimal(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')))
 
+
+@pytest.mark.skip(reason="https://github.com/NVIDIA/spark-rapids/issues/11550")
+@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10050')
+@pytest.mark.parametrize('data_gen', [
+    DecimalGen(3, 0)], ids=meta_idfn('from:'))
+@pytest.mark.parametrize('to_type', [
+    DecimalType(1, -1)], ids=meta_idfn('to:'))
+def test_ansi_cast_failures_decimal_to_decimal(data_gen, to_type):
+    assert_gpu_and_cpu_error(
+        lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type), f.col('a')).collect(),
+        conf=ansi_enabled_conf,
+        error_message="overflow occurred")
+
+
 @pytest.mark.parametrize('data_gen', [byte_gen, short_gen, int_gen, long_gen], ids=idfn)
 @pytest.mark.parametrize('to_type', [
     DecimalType(2, 0),
@@ -240,10 +314,21 @@ def test_cast_decimal_to_decimal(data_gen, to_type):
     DecimalType(10, 2),
     DecimalType(18, 0),
     DecimalType(18, 2)], ids=idfn)
-def test_cast_integral_to_decimal(data_gen, to_type):
+def test_cast_integral_to_decimal_ansi_off(data_gen, to_type):
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, data_gen).select(
+            f.col('a').cast(to_type)),
+        conf=ansi_disabled_conf)
+
+
+@pytest.mark.skip("https://github.com/NVIDIA/spark-rapids/issues/11550")
+@pytest.mark.parametrize('data_gen', [long_gen], ids=idfn)
+@pytest.mark.parametrize('to_type', [DecimalType(2, 0)], ids=idfn)
+def test_cast_integral_to_decimal_ansi_on(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(
-            f.col('a').cast(to_type)))
+                f.col('a').cast(to_type)),
+        conf=ansi_enabled_conf)
 
 def test_cast_byte_to_decimal_overflow():
     assert_gpu_and_cpu_are_equal_collect(
@@ -278,11 +363,28 @@ def test_cast_long_to_decimal_overflow():
     DecimalType(30, 3),
     DecimalType(5, -3),
     DecimalType(3, 0)], ids=idfn)
-def test_cast_floating_point_to_decimal(data_gen, to_type):
+def test_cast_floating_point_to_decimal_ansi_off(data_gen, to_type):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen).select(
             f.col('a'), f.col('a').cast(to_type)),
-        conf={'spark.rapids.sql.castFloatToDecimal.enabled': 'true'})
+        conf=copy_and_update(
+               ansi_disabled_conf,
+               {'spark.rapids.sql.castFloatToDecimal.enabled': True}))
+
+
+@pytest.mark.skip("https://github.com/NVIDIA/spark-rapids/issues/11550")
+@pytest.mark.parametrize('data_gen', [FloatGen(special_cases=_float_special_cases)])
+@pytest.mark.parametrize('to_type', [DecimalType(7, 1)])
+def test_cast_floating_point_to_decimal_ansi_on(data_gen, to_type):
+    assert_gpu_and_cpu_error(
+        lambda spark : unary_op_df(spark, data_gen).select(
+                         f.col('a'),
+                         f.col('a').cast(to_type)).collect(),
+        conf=copy_and_update(
+            ansi_enabled_conf,
+            {'spark.rapids.sql.castFloatToDecimal.enabled': True}),
+        error_message="[NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION]")
+
 
 # casting these types to string should be passed
 basic_gens_for_cast_to_string = [ByteGen, ShortGen, IntegerGen, LongGen, StringGen, BooleanGen, DateGen, TimestampGen]
@@ -323,7 +425,7 @@ def _assert_cast_to_string_equal (data_gen, conf):
 
 
 @pytest.mark.parametrize('data_gen', all_array_gens_for_cast_to_string, ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @allow_non_gpu(*non_utc_allow)
 def test_cast_array_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
@@ -347,18 +449,18 @@ def test_cast_double_to_string():
     assert from_cpu_float == from_gpu_float
 
 @pytest.mark.parametrize('data_gen', [ArrayGen(sub) for sub in not_matched_struct_array_gens_for_cast_to_string], ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @pytest.mark.xfail(reason='casting this type to string is not exact match')
 def test_cast_array_with_unmatched_element_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
-        {"spark.rapids.sql.castFloatToString.enabled"       : "true",
+        {"spark.rapids.sql.castFloatToString.enabled"       : True,
          "spark.sql.legacy.castComplexTypesToString.enabled": legacy}
     )
 
 
 @pytest.mark.parametrize('data_gen', basic_map_gens_for_cast_to_string, ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @allow_non_gpu(*non_utc_allow)
 def test_cast_map_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
@@ -367,18 +469,18 @@ def test_cast_map_to_string(data_gen, legacy):
 
 
 @pytest.mark.parametrize('data_gen', not_matched_map_gens_for_cast_to_string, ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @pytest.mark.xfail(reason='casting this type to string is not exact match')
 def test_cast_map_with_unmatched_element_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
-        {"spark.rapids.sql.castFloatToString.enabled"       : "true",
+        {"spark.rapids.sql.castFloatToString.enabled"       : True,
          "spark.sql.legacy.castComplexTypesToString.enabled": legacy}
     )
 
 
 @pytest.mark.parametrize('data_gen', [StructGen([[str(i), gen] for i, gen in enumerate(basic_array_struct_gens_for_cast_to_string)] + [["map", MapGen(ByteGen(nullable=False), null_gen)]])], ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @allow_non_gpu(*non_utc_allow)
 def test_cast_struct_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
@@ -400,7 +502,7 @@ def was_broken_for_nested_null(spark):
 
     assert_gpu_and_cpu_are_equal_collect(
         was_broken_for_nested_null,
-        {"spark.sql.legacy.castComplexTypesToString.enabled": 'true' if cast_conf == 'LEGACY' else 'false'}
+        {"spark.sql.legacy.castComplexTypesToString.enabled": True if cast_conf == 'LEGACY' else False}
     )
 
 # https://github.com/NVIDIA/spark-rapids/issues/2315
@@ -417,16 +519,16 @@ def broken_df(spark):
 
     assert_gpu_and_cpu_are_equal_collect(
         broken_df,
-        {"spark.sql.legacy.castComplexTypesToString.enabled": 'true' if cast_conf == 'LEGACY' else 'false'}
+        {"spark.sql.legacy.castComplexTypesToString.enabled": True if cast_conf == 'LEGACY' else False}
     )
 
 @pytest.mark.parametrize('data_gen', [StructGen([["first", element_gen]]) for element_gen in not_matched_struct_array_gens_for_cast_to_string], ids=idfn)
-@pytest.mark.parametrize('legacy', ['true', 'false'])
+@pytest.mark.parametrize('legacy', [True, False])
 @pytest.mark.xfail(reason='casting this type to string is not an exact match')
 def test_cast_struct_with_unmatched_element_to_string(data_gen, legacy):
     _assert_cast_to_string_equal(
         data_gen,
-        {"spark.rapids.sql.castFloatToString.enabled"       : "true",
+        {"spark.rapids.sql.castFloatToString.enabled"       : True,
          "spark.sql.legacy.castComplexTypesToString.enabled": legacy}
     )
 
@@ -481,13 +583,17 @@ def getDf(spark):
 # non ansi mode, will get null
 @pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_cast_float_to_timestamp_for_nan_inf(type):
+def test_with_ansi_off_cast_float_to_timestamp_for_nan_inf(type):
+    """
+    Tests the behaviour of floats when cast to timestamp, with ANSI disabled.
+    ANSI mode tests are covered in test_cast_float_to_timestamp_ansi_for_nan_inf.
+    """
     def fun(spark):
         data = [(float("inf"),), (float("-inf"),), (float("nan"),)]
         schema = StructType([StructField("value", type, True)])
         df = spark.createDataFrame(data, schema)
         return df.select(f.col('value').cast(TimestampType()))
-    assert_gpu_and_cpu_are_equal_collect(fun)
+    assert_gpu_and_cpu_are_equal_collect(fun, conf=ansi_disabled_conf)
 
 # gen for casting long to timestamp, range is about in [0000, 9999]
 long_gen_to_timestamp = LongGen(max_val=math.floor((9999-1970) * 365 * 86400),
@@ -554,11 +660,20 @@ def test_cast_timestamp_to_numeric_ansi_no_overflow():
                         "cast(value as float)", "cast(value as double)"),
         conf=ansi_enabled_conf)
 
+
+@pytest.mark.skipif(is_databricks_runtime() and is_databricks_version_or_later(14, 3),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/11555")
+@pytest.mark.skipif(not is_databricks_runtime() and is_spark_400_or_later(),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/11555")
 def test_cast_timestamp_to_numeric_non_ansi():
+    """
+    Test timestamp->numeric conversions with ANSI off.
+    """
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, timestamp_gen)
             .selectExpr("cast(a as byte)", "cast(a as short)", "cast(a as int)", "cast(a as long)",
-                        "cast(a as float)", "cast(a as double)"))
+                        "cast(a as float)", "cast(a as double)"),
+        conf=ansi_disabled_conf)
 
 @allow_non_gpu(*non_utc_allow)
 def test_cast_timestamp_to_string():
@@ -735,9 +850,16 @@ def test_cast_fallback_not_UTC(from_gen, to_type):
         lambda spark: unary_op_df(spark, from_gen).selectExpr("CAST(a AS {}) as casted".format(to_type)),
         "Cast",
         {"spark.sql.session.timeZone": "+08",
-         "spark.rapids.sql.castStringToTimestamp.enabled": "true"})
+         "spark.rapids.sql.castStringToTimestamp.enabled": True})
 
-def test_cast_date_integral_and_fp():
+
+def test_cast_date_integral_and_fp_ansi_off():
+    """
+    This tests that a date column can be cast to different numeric/floating-point types.
+    This needs to be tested with ANSI disabled, because none of these conversions are
+    ANSI-compliant.
+    """
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, date_gen).selectExpr(
-            "cast(a as boolean)", "cast(a as byte)", "cast(a as short)", "cast(a as int)", "cast(a as long)", "cast(a as float)", "cast(a as double)"))
+            "cast(a as boolean)", "cast(a as byte)", "cast(a as short)", "cast(a as int)", "cast(a as long)", "cast(a as float)", "cast(a as double)"),
+            conf=ansi_disabled_conf)
diff --git a/integration_tests/src/main/python/collection_ops_test.py b/integration_tests/src/main/python/collection_ops_test.py
index 099eb28c053..4aef35b0b59 100644
--- a/integration_tests/src/main/python/collection_ops_test.py
+++ b/integration_tests/src/main/python/collection_ops_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,10 +17,11 @@
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error
 from data_gen import *
 from pyspark.sql.types import *
+
 from string_test import mk_str_gen
 import pyspark.sql.functions as f
 import pyspark.sql.utils
-from spark_session import with_cpu_session, with_gpu_session, is_before_spark_334, is_before_spark_351, is_before_spark_342, is_before_spark_340, is_spark_350
+from spark_session import with_cpu_session, with_gpu_session, is_before_spark_334, is_before_spark_342, is_before_spark_340, is_databricks_version_or_later, is_spark_350, is_spark_400_or_later
 from conftest import get_datagen_seed
 from marks import allow_non_gpu
 
@@ -326,8 +327,12 @@ def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
 @pytest.mark.parametrize('stop_gen', sequence_too_long_length_gens, ids=idfn)
 @allow_non_gpu(*non_utc_allow)
 def test_sequence_too_long_sequence(stop_gen):
-    msg = "Too long sequence" if is_before_spark_334() or (not is_before_spark_340() and is_before_spark_342()) \
-    or is_spark_350() else "Unsuccessful try to create array with"
+    msg = "Too long sequence" if is_before_spark_334() \
+                                 or (not is_before_spark_340() and is_before_spark_342()) \
+                                 or (is_spark_350() and not is_databricks_version_or_later(14, 3)) \
+          else "Can't create array" if ((is_databricks_version_or_later(14, 3))
+                                         or is_spark_400_or_later()) \
+          else "Unsuccessful try to create array with"
     assert_gpu_and_cpu_error(
         # To avoid OOM, reduce the row number to 1, it is enough to verify this case.
         lambda spark:unary_op_df(spark, stop_gen, 1).selectExpr(
diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py
index b95ed53f398..aaa390476a4 100644
--- a/integration_tests/src/main/python/conditionals_test.py
+++ b/integration_tests/src/main/python/conditionals_test.py
@@ -379,3 +379,30 @@ def test_case_when_all_then_values_are_scalars_with_nulls():
         "tab",
         sql_without_else,
         conf = {'spark.rapids.sql.case_when.fuse': 'true'})
+
+@pytest.mark.parametrize('combine_string_contains_enabled', [True, False])
+def test_combine_string_contains_in_case_when(combine_string_contains_enabled):
+    data_gen = [("c1", string_gen)]
+    sql =  """
+            SELECT
+                CASE
+                     WHEN INSTR(c1, 'a') > 0 THEN 'a'
+                     WHEN INSTR(c1, 'b') > 0 THEN 'b'
+                     WHEN INSTR(c1, 'c') > 0 THEN 'c'
+                     ELSE ''
+                END as output_1,
+                CASE
+                     WHEN INSTR(c1, 'c') > 0 THEN 'c'
+                     WHEN INSTR(c1, 'd') > 0 THEN 'd'
+                     WHEN INSTR(c1, 'e') > 0 THEN 'e'
+                     ELSE ''
+                END as output_2
+            from tab
+            """
+    # spark.rapids.sql.combined.expressions.enabled is true by default
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : gen_df(spark, data_gen),
+        "tab",
+        sql,
+        { "spark.rapids.sql.expression.combined.GpuContains" : combine_string_contains_enabled}
+    )
diff --git a/integration_tests/src/main/python/datasourcev2_write_test.py b/integration_tests/src/main/python/datasourcev2_write_test.py
index 1f4bc133d2a..4fffd10ab44 100644
--- a/integration_tests/src/main/python/datasourcev2_write_test.py
+++ b/integration_tests/src/main/python/datasourcev2_write_test.py
@@ -18,7 +18,7 @@
 from data_gen import gen_df, decimal_gens, non_utc_allow
 from marks import *
 from spark_session import is_hive_available, is_spark_330_or_later, with_cpu_session, with_gpu_session
-from hive_parquet_write_test import _hive_bucket_gens, _hive_array_gens, _hive_struct_gens
+from hive_parquet_write_test import _hive_bucket_gens_sans_bools, _hive_array_gens, _hive_struct_gens
 from hive_parquet_write_test import read_single_bucket
 
 _hive_write_conf = {
@@ -33,9 +33,11 @@
 @allow_non_gpu(*non_utc_allow)
 def test_write_hive_bucketed_table(spark_tmp_table_factory, file_format):
     num_rows = 2048
-
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen
     def gen_table(spark):
-        gen_list = [('_c' + str(i), gen) for i, gen in enumerate(_hive_bucket_gens)]
+        gen_list = [('_c' + str(i), gen) for i, gen in enumerate(_hive_bucket_gens_sans_bools)]
         types_sql_str = ','.join('{} {}'.format(
             name, gen.data_type.simpleString()) for name, gen in gen_list)
         col_names_str = ','.join(name for name, gen in gen_list)
diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py
index f83c238a70c..1a7024dac85 100644
--- a/integration_tests/src/main/python/date_time_test.py
+++ b/integration_tests/src/main/python/date_time_test.py
@@ -17,7 +17,7 @@
 from conftest import is_utc, is_supported_time_zone, get_test_tz
 from data_gen import *
 from datetime import date, datetime, timezone
-from marks import allow_non_gpu, datagen_overrides, disable_ansi_mode, ignore_order, incompat, tz_sensitive_test
+from marks import allow_non_gpu, approximate_float, datagen_overrides, disable_ansi_mode, ignore_order, incompat, tz_sensitive_test
 from pyspark.sql.types import *
 from spark_session import with_cpu_session, is_before_spark_330, is_before_spark_350
 import pyspark.sql.functions as f
@@ -139,6 +139,39 @@ def test_datediff(data_gen):
 
 hms_fallback = ['ProjectExec'] if not is_supported_time_zone() else []
 
+@allow_non_gpu(*non_utc_tz_allow)
+def test_months_between():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : binary_op_df(spark, timestamp_gen).selectExpr('months_between(a, b, false)'))
+
+@allow_non_gpu(*non_utc_tz_allow)
+def test_months_between_first_day():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2024-01-01", false)'))
+
+@allow_non_gpu(*non_utc_tz_allow)
+def test_months_between_last_day():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2023-12-31", false)'))
+
+@allow_non_gpu(*non_utc_tz_allow)
+@approximate_float()
+def test_months_between_round():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : binary_op_df(spark, timestamp_gen).selectExpr('months_between(a, b, true)'))
+
+@allow_non_gpu(*non_utc_tz_allow)
+@approximate_float()
+def test_months_between_first_day_round():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2024-01-01", true)'))
+
+@allow_non_gpu(*non_utc_tz_allow)
+@approximate_float()
+def test_months_between_last_day_round():
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2023-12-31", true)'))
+
 @allow_non_gpu(*hms_fallback)
 def test_hour():
     assert_gpu_and_cpu_are_equal_collect(
@@ -459,19 +492,39 @@ def test_to_timestamp(parser_policy):
             .select(f.col("a"), f.to_timestamp(f.col("a"), "yyyy-MM-dd HH:mm:ss")),
         { "spark.sql.legacy.timeParserPolicy": parser_policy})
 
+# mm: minute; MM: month
 @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported")
+@pytest.mark.parametrize("format", ['yyyyMMdd', 'yyyymmdd'], ids=idfn)
 # Test years after 1900, refer to issues: https://github.com/NVIDIA/spark-rapids/issues/11543, https://github.com/NVIDIA/spark-rapids/issues/11539
 @pytest.mark.skipif(get_test_tz() != "Asia/Shanghai" and get_test_tz() != "UTC", reason="https://github.com/NVIDIA/spark-rapids/issues/11562")
-def test_yyyyMMdd_format_for_legacy_mode():
+def test_formats_for_legacy_mode(format):
     gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4})')
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : unary_op_df(spark, gen),
         "tab",
-        '''select unix_timestamp(a, 'yyyyMMdd'),
-                  from_unixtime(unix_timestamp(a, 'yyyyMMdd'), 'yyyyMMdd'),
-                  date_format(to_timestamp(a, 'yyyyMMdd'), 'yyyyMMdd')
+        '''select unix_timestamp(a, '{}'),
+                  from_unixtime(unix_timestamp(a, '{}'), '{}'),
+                  date_format(to_timestamp(a, '{}'), '{}')
+           from tab
+        '''.format(format, format, format, format, format),
+        {'spark.sql.legacy.timeParserPolicy': 'LEGACY',
+         'spark.rapids.sql.incompatibleDateFormats.enabled': True})
+
+# mm: minute; MM: month
+@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported")
+@pytest.mark.skipif(get_test_tz() != "Asia/Shanghai" and get_test_tz() != "UTC", reason="https://github.com/NVIDIA/spark-rapids/issues/11562")
+def test_formats_for_legacy_mode_other_formats():
+    format = "yyyyMMdd HH:mm:ss"
+    # Test years after 1900,
+    gen = StringGen('(19[0-9]{2}|[2-9][0-9]{3})([0-9]{4}) [0-9]{2}:[0-9]{2}:[0-9]{2}')
+    assert_gpu_and_cpu_are_equal_sql(
+        lambda spark : unary_op_df(spark, gen),
+        "tab",
+        '''select unix_timestamp(a, '{}'),
+                  from_unixtime(unix_timestamp(a, '{}'), '{}'),
+                  date_format(to_timestamp(a, '{}'), '{}')
            from tab
-        ''',
+        '''.format(format, format, format, format, format),
         {'spark.sql.legacy.timeParserPolicy': 'LEGACY',
          'spark.rapids.sql.incompatibleDateFormats.enabled': True})
 
diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py
index b362a4175f3..3d5ee1a5afa 100644
--- a/integration_tests/src/main/python/dpp_test.py
+++ b/integration_tests/src/main/python/dpp_test.py
@@ -20,7 +20,7 @@
 from conftest import spark_tmp_table_factory
 from data_gen import *
 from marks import ignore_order, allow_non_gpu, datagen_overrides, disable_ansi_mode
-from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later
+from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later, is_databricks_version_or_later
 
 # non-positive values here can produce a degenerative join, so here we ensure that most values are
 # positive to ensure the join will produce rows. See https://github.com/NVIDIA/spark-rapids/issues/10147
@@ -167,10 +167,17 @@ def fn(spark):
     '''
 ]
 
+# On some Databricks versions (>=14.3), some query plans include a `CollectLimitExec`,
+# when filtering partitions.  This exec falls back to CPU.  These tests allow for `CollectLimit` to
+# run on the CPU, if everything else in the plan execute as expected.
+# Further details are furnished at https://github.com/NVIDIA/spark-rapids/issues/11764.
+dpp_fallback_execs=["CollectLimitExec"] if is_databricks_version_or_later(14,3) else []
+
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 # When BroadcastExchangeExec is available on filtering side, and it can be reused:
 # DynamicPruningExpression(InSubqueryExec(value, GpuSubqueryBroadcastExec)))
 @ignore_order
+@allow_non_gpu(*dpp_fallback_execs)
 @datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10147")
 @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn)
 @pytest.mark.parametrize('s_index', list(range(len(_statements))), ids=idfn)
@@ -245,6 +252,7 @@ def test_dpp_bypass(spark_tmp_table_factory, store_format, s_index, aqe_enabled)
 # then Spark will plan an extra Aggregate to collect filtering values:
 # DynamicPruningExpression(InSubqueryExec(value, SubqueryExec(Aggregate(...))))
 @ignore_order
+@allow_non_gpu(*dpp_fallback_execs)
 @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn)
 @pytest.mark.parametrize('s_index', list(range(len(_statements))), ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', [
@@ -285,10 +293,11 @@ def test_dpp_skip(spark_tmp_table_factory, store_format, s_index, aqe_enabled):
         non_exist_classes='DynamicPruningExpression',
         conf=dict(_dpp_fallback_conf + [('spark.sql.adaptive.enabled', aqe_enabled)]))
 
+dpp_like_any_fallback_execs=['FilterExec', 'CollectLimitExec'] if is_databricks_version_or_later(14,3) else ['FilterExec']
 
 # GPU verification on https://issues.apache.org/jira/browse/SPARK-34436
 @ignore_order
-@allow_non_gpu('FilterExec')
+@allow_non_gpu(*dpp_like_any_fallback_execs)
 @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', [
     'false',
@@ -327,6 +336,7 @@ def create_dim_table_for_like(spark):
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
+@allow_non_gpu(*dpp_fallback_execs)
 # Test handling DPP expressions from a HashedRelation that rearranges columns
 @pytest.mark.parametrize('aqe_enabled', [
     'false',
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 734b4dfb708..444e4131724 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -204,6 +204,8 @@
 _decimal_gen_36_neg5 = DecimalGen(precision=36, scale=-5)
 _decimal_gen_38_10 = DecimalGen(precision=38, scale=10)
 
+kudo_enabled_conf_key = "spark.rapids.shuffle.kudo.serializer.enabled"
+
 
 def get_params(init_list, marked_params=[]):
     """
@@ -307,7 +309,8 @@ def get_params(init_list, marked_params=[]):
 @nightly_gpu_mem_consuming_case
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('precision', [38, 37, 36, 35, 34, 33, 32, 31], ids=idfn)
-def test_hash_reduction_decimal_overflow_sum(precision):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_decimal_overflow_sum(precision, kudo_enabled):
     constant = '9' * precision
     count = pow(10, 38 - precision)
     assert_gpu_and_cpu_are_equal_collect(
@@ -318,16 +321,20 @@ def test_hash_reduction_decimal_overflow_sum(precision):
         # run out of memory in some setups. These should not happen in production, because
         # we really are just doing a really bad job at multiplying to get this result so
         # some optimizations are conspiring against us.
-        conf = {'spark.rapids.sql.batchSizeBytes': '128m'})
+        conf = {'spark.rapids.sql.batchSizeBytes': '128m',
+                kudo_enabled_conf_key: kudo_enabled})
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_longs_with_nulls], ids=idfn)
 @pytest.mark.parametrize('override_split_until_size', [None, 1], ids=idfn)
 @pytest.mark.parametrize('override_batch_size_bytes', [None, 1], ids=idfn)
-def test_hash_grpby_sum_count_action(data_gen, override_split_until_size, override_batch_size_bytes):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_sum_count_action(data_gen, override_split_until_size,
+                                     override_batch_size_bytes, kudo_enabled):
     conf = {
-        'spark.rapids.sql.test.overrides.splitUntilSize': override_split_until_size
+        'spark.rapids.sql.test.overrides.splitUntilSize': override_split_until_size,
+        kudo_enabled_conf_key: kudo_enabled
     }
     if override_batch_size_bytes is not None:
         conf["spark.rapids.sql.batchSizeBytes"] = override_batch_size_bytes
@@ -340,23 +347,29 @@ def test_hash_grpby_sum_count_action(data_gen, override_split_until_size, overri
 @allow_non_gpu("SortAggregateExec", "SortExec", "ShuffleExchangeExec")
 @ignore_order
 @pytest.mark.parametrize('data_gen', _grpkey_nested_structs_with_array_basic_child + _grpkey_list_with_non_nested_children, ids=idfn)
-def test_hash_grpby_list_min_max(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_list_min_max(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: gen_df(spark, data_gen, length=100).coalesce(1).groupby('a').agg(f.min('b'), f.max('b'))
-    )
+        lambda spark: gen_df(spark, data_gen, length=100).coalesce(1).groupby('a').agg(f.min(
+            'b'), f.max('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_longs_with_nulls], ids=idfn)
-def test_hash_reduction_sum_count_action(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_sum_count_action(data_gen, kudo_enabled):
     assert_gpu_and_cpu_row_counts_equal(
-        lambda spark: gen_df(spark, data_gen, length=100).agg(f.sum('b'))
+        lambda spark: gen_df(spark, data_gen, length=100).agg(f.sum('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled}
     )
 
 # Make sure that we can do computation in the group by columns
 @ignore_order
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
-def test_computation_in_grpby_columns():
-    conf = {'spark.rapids.sql.batchSizeBytes' : '250'}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_computation_in_grpby_columns(kudo_enabled):
+    conf = {'spark.rapids.sql.batchSizeBytes' : '250',
+            kudo_enabled_conf_key: kudo_enabled}
     data_gen = [
             ('a', RepeatSeqGen(StringGen('a{1,20}'), length=50)),
             ('b', short_gen)]
@@ -371,10 +384,12 @@ def test_computation_in_grpby_columns():
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_sum(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_sum(data_gen, conf, kudo_enabled):
+    new_conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a').agg(f.sum('b')),
-        conf = conf)
+        conf = new_conf)
 
 @shuffle_test
 @approximate_float
@@ -383,10 +398,12 @@ def test_hash_grpby_sum(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_grpkey_short_sum_full_decimals, _grpkey_short_sum_full_neg_scale_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_sum_full_decimal(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_sum_full_decimal(data_gen, conf, kudo_enabled):
+    new_conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a').agg(f.sum('b')),
-        conf = conf)
+        conf = new_conf)
 
 @approximate_float
 @datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9822")
@@ -394,10 +411,12 @@ def test_hash_grpby_sum_full_decimal(data_gen, conf):
 @incompat
 @pytest.mark.parametrize('data_gen', numeric_gens + decimal_gens + [DecimalGen(precision=36, scale=5)], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_sum(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_sum(data_gen, conf, kudo_enabled):
+    new_conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen, length=100).selectExpr("SUM(a)"),
-        conf = conf)
+        conf = new_conf)
 
 @approximate_float
 @ignore_order
@@ -406,11 +425,13 @@ def test_hash_reduction_sum(data_gen, conf):
 @pytest.mark.parametrize('data_gen', numeric_gens + decimal_gens + [
     DecimalGen(precision=38, scale=0), DecimalGen(precision=38, scale=-10)], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @datagen_overrides(seed=0, permanent=True, reason='https://github.com/NVIDIA/spark-rapids/issues/9779')
-def test_hash_reduction_sum_full_decimal(data_gen, conf):
+def test_hash_reduction_sum_full_decimal(data_gen, conf, kudo_enabled):
+    new_conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen, length=100).selectExpr("SUM(a)"),
-        conf = conf)
+        conf = new_conf)
 
 @approximate_float
 @ignore_order
@@ -419,10 +440,12 @@ def test_hash_reduction_sum_full_decimal(data_gen, conf):
 @pytest.mark.parametrize('data_gen', _init_list + [_grpkey_short_mid_decimals,
     _grpkey_short_big_decimals, _grpkey_short_very_big_decimals, _grpkey_short_sum_full_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_avg(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_avg(data_gen, conf, kudo_enabled):
+    new_conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=200).groupby('a').agg(f.avg('b')),
-        conf=conf
+        conf=new_conf
     )
 
 # tracks https://github.com/NVIDIA/spark-rapids/issues/154
@@ -438,30 +461,38 @@ def test_hash_grpby_avg(data_gen, conf):
 @pytest.mark.parametrize('data_gen', [
     StructGen(children=[('a', int_gen), ('b', int_gen)],nullable=False,
         special_cases=[((None, None), 400.0), ((None, -1542301795), 100.0)])], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/4963')
-def test_hash_avg_nulls_partial_only(data_gen):
+def test_hash_avg_nulls_partial_only(data_gen, kudo_enabled):
+    conf = copy_and_update(_float_conf_partial, {kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=2).agg(f.avg('b')),
-        conf=_float_conf_partial
-    )
+        conf=conf)
 
 @approximate_float
 @ignore_order
 @incompat
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
-def test_intersect_all(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_intersect_all(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen, length=100)))
+        lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen,
+                                                                               length=100)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @approximate_float
 @ignore_order
 @incompat
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
-def test_exceptAll(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_exceptAll(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : gen_df(spark, data_gen, length=100).exceptAll(gen_df(spark, data_gen, length=100).filter('a != b')))
+        lambda spark : (gen_df(spark, data_gen, length=100)
+                        .exceptAll(gen_df(spark, data_gen, length=100)
+                        .filter('a != b'))),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # Spark fails to sort some decimal values due to overflow when calculating the sorting prefix.
 # See https://issues.apache.org/jira/browse/SPARK-40129
@@ -488,13 +519,14 @@ def test_exceptAll(data_gen):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _pivot_gens_with_decimals, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_pivot(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_pivot(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a')
             .pivot('b')
             .agg(f.sum('c')),
-        conf = conf)
+        conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @approximate_float
 @ignore_order(local=True)
@@ -503,13 +535,14 @@ def test_hash_grpby_pivot(data_gen, conf):
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10062')
-def test_hash_multiple_grpby_pivot(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_multiple_grpby_pivot(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a','b')
             .pivot('b')
             .agg(f.sum('c'), f.max('c')),
-        conf=conf)
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @approximate_float
 @ignore_order(local=True)
@@ -517,13 +550,14 @@ def test_hash_multiple_grpby_pivot(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_pivot(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_pivot(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby()
             .pivot('b')
             .agg(f.sum('c')),
-        conf=conf)
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @approximate_float
@@ -533,7 +567,8 @@ def test_hash_reduction_pivot(data_gen, conf):
 @incompat
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_grpkey_floats_with_nulls_and_nans], ids=idfn)
-def test_hash_pivot_groupby_duplicates_fallback(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_pivot_groupby_duplicates_fallback(data_gen, kudo_enabled):
     # PivotFirst will not work on the GPU when pivot_values has duplicates
     assert_gpu_fallback_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -541,7 +576,7 @@ def test_hash_pivot_groupby_duplicates_fallback(data_gen):
             .pivot('b', ['10.0', '10.0'])
             .agg(f.sum('c')),
         "PivotFirst",
-        conf=_float_conf)
+        conf=copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}) )
 
 _repeat_agg_column_for_collect_op = [
     RepeatSeqGen(BooleanGen(), length=15),
@@ -610,43 +645,53 @@ def test_hash_pivot_groupby_duplicates_fallback(data_gen):
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [decimal_gen_128bit], ids=idfn)
-def test_decimal128_count_reduction(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_decimal128_count_reduction(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: unary_op_df(spark, data_gen).selectExpr('count(a)'))
+        lambda spark: unary_op_df(spark, data_gen).selectExpr('count(a)'),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # very simple test for just a count on decimals 128 values until we can support more with them
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [decimal_gen_128bit], ids=idfn)
-def test_decimal128_count_group_by(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_decimal128_count_group_by(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, byte_gen, data_gen)
             .groupby('a')
-            .agg(f.count('b')))
+            .agg(f.count('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # very simple test for just a min/max on decimals 128 values until we can support more with them
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [decimal_gen_128bit], ids=idfn)
-def test_decimal128_min_max_reduction(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_decimal128_min_max_reduction(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: unary_op_df(spark, data_gen).selectExpr('min(a)', 'max(a)'))
+        lambda spark: unary_op_df(spark, data_gen).selectExpr('min(a)', 'max(a)'),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # very simple test for just a min/max on decimals 128 values until we can support more with them
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [decimal_gen_128bit], ids=idfn)
-def test_decimal128_min_max_group_by(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_decimal128_min_max_group_by(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, byte_gen, data_gen)
             .groupby('a')
-            .agg(f.min('b'), f.max('b')))
+            .agg(f.min('b'), f.max('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _all_basic_gens_with_all_nans_cases, ids=idfn)
-def test_min_max_group_by(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_min_max_group_by(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, byte_gen, data_gen)
             .groupby('a')
-            .agg(f.min('b'), f.max('b')))
+            .agg(f.min('b'), f.max('b')),
+    conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # To avoid ordering issues with collect_list, sorting the arrays that are returned.
 # NOTE: sorting the arrays locally, because sort_array() does not yet
@@ -657,18 +702,21 @@ def test_min_max_group_by(data_gen):
 @ignore_order(local=True, arrays=["blist"])
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_list_op, ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', [True, False], ids=idfn)
-def test_hash_groupby_collect_list(data_gen, use_obj_hash_agg):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_collect_list(data_gen, use_obj_hash_agg, kudo_enabled):
     def doit(spark):
         return gen_df(spark, data_gen, length=100)\
             .groupby('a')\
             .agg(f.collect_list('b').alias("blist"))
     assert_gpu_and_cpu_are_equal_collect(
         doit,
-        conf={'spark.sql.execution.useObjectHashAggregateExec': str(use_obj_hash_agg).lower()})
+        conf={'spark.sql.execution.useObjectHashAggregateExec': str(use_obj_hash_agg).lower(),
+              kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('use_obj_hash_agg', [True, False], ids=idfn)
-def test_hash_groupby_collect_list_of_maps(use_obj_hash_agg):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_collect_list_of_maps(use_obj_hash_agg, kudo_enabled):
     gens = [("a", RepeatSeqGen(LongGen(), length=20)), ("b", simple_string_to_string_map_gen)]
     def doit(spark):
         df = gen_df(spark, gens, length=100) \
@@ -680,27 +728,32 @@ def doit(spark):
         return spark.createDataFrame(df.rdd, schema=df.schema).select("a", f.explode("blist"))
     assert_gpu_and_cpu_are_equal_collect(
         doit,
-        conf={'spark.sql.execution.useObjectHashAggregateExec': str(use_obj_hash_agg).lower()})
+        conf={'spark.sql.execution.useObjectHashAggregateExec': str(use_obj_hash_agg).lower(),
+              kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_groupby_collect_set(data_gen):
+def test_hash_groupby_collect_set(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a')
-            .agg(f.sort_array(f.collect_set('b')), f.count('b')))
+            .agg(f.sort_array(f.collect_set('b')), f.count('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_groupby_collect_set_on_nested_type(data_gen):
+def test_hash_groupby_collect_set_on_nested_type(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a')
-            .agg(f.sort_array(f.collect_set('b'))))
+            .agg(f.sort_array(f.collect_set('b'))),
+        conf= {kudo_enabled_conf_key: kudo_enabled})
 
 
 # NOTE: sorting the arrays locally, because sort_array() does not yet
@@ -710,9 +763,11 @@ def test_hash_groupby_collect_set_on_nested_type(data_gen):
 @ignore_order(local=True, arrays=["collect_set"])
 @allow_non_gpu("ProjectExec", *non_utc_allow)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op_nested, ids=idfn)
-def test_hash_groupby_collect_set_on_nested_array_type(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_collect_set_on_nested_array_type(data_gen, kudo_enabled):
     conf = copy_and_update(_float_conf, {
         "spark.rapids.sql.castFloatToString.enabled": "true",
+        kudo_enabled_conf_key: kudo_enabled
     })
 
     def do_it(spark):
@@ -726,19 +781,23 @@ def do_it(spark):
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_reduction_collect_set(data_gen):
+def test_hash_reduction_collect_set(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
-            .agg(f.sort_array(f.collect_set('b')), f.count('b')))
+            .agg(f.sort_array(f.collect_set('b')), f.count('b')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_reduction_collect_set_on_nested_type(data_gen):
+def test_hash_reduction_collect_set_on_nested_type(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
-            .agg(f.sort_array(f.collect_set('b'))))
+            .agg(f.sort_array(f.collect_set('b'))),
+        conf= {kudo_enabled_conf_key: kudo_enabled})
 
 
 # NOTE: sorting the arrays locally, because sort_array() does not yet
@@ -748,9 +807,11 @@ def test_hash_reduction_collect_set_on_nested_type(data_gen):
 @ignore_order(local=True, arrays=["collect_set"])
 @allow_non_gpu("ProjectExec", *non_utc_allow)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_set_op_nested, ids=idfn)
-def test_hash_reduction_collect_set_on_nested_array_type(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_collect_set_on_nested_array_type(data_gen, kudo_enabled):
     conf = copy_and_update(_float_conf, {
         "spark.rapids.sql.castFloatToString.enabled": "true",
+        kudo_enabled_conf_key: kudo_enabled
     })
 
     def do_it(spark):
@@ -763,8 +824,9 @@ def do_it(spark):
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _full_gen_data_for_collect_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_groupby_collect_with_single_distinct(data_gen):
+def test_hash_groupby_collect_with_single_distinct(data_gen, kudo_enabled):
     # test collect_ops with other distinct aggregations
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -772,7 +834,8 @@ def test_hash_groupby_collect_with_single_distinct(data_gen):
             .agg(f.sort_array(f.collect_list('b')),
                  f.sort_array(f.collect_set('b')),
                  f.countDistinct('c'),
-                 f.count('c')))
+                 f.count('c')),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 def hash_groupby_single_distinct_collect_impl(data_gen, conf):
@@ -798,41 +861,46 @@ def hash_groupby_single_distinct_collect_impl(data_gen, conf):
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_groupby_single_distinct_collect(data_gen):
+def test_hash_groupby_single_distinct_collect(data_gen, kudo_enabled):
     """
     Tests distinct collect, with ANSI disabled.
     The corresponding ANSI-enabled condition is tested in
     test_hash_groupby_single_distinct_collect_ansi_enabled
     """
-    ansi_disabled_conf = {'spark.sql.ansi.enabled': False}
+    ansi_disabled_conf = {'spark.sql.ansi.enabled': False,
+                          kudo_enabled_conf_key: kudo_enabled}
     hash_groupby_single_distinct_collect_impl(data_gen=data_gen, conf=ansi_disabled_conf)
 
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [_gen_data_for_collect_op[0]], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
 @allow_non_gpu('ObjectHashAggregateExec', 'ShuffleExchangeExec')
-def test_hash_groupby_single_distinct_collect_ansi_enabled(data_gen):
+def test_hash_groupby_single_distinct_collect_ansi_enabled(data_gen, kudo_enabled):
     """
     Tests distinct collect, with ANSI enabled.
     Enabling ANSI mode causes the plan to include ObjectHashAggregateExec, which runs on CPU.
     """
-    hash_groupby_single_distinct_collect_impl(data_gen=data_gen, conf=ansi_enabled_conf)
+    hash_groupby_single_distinct_collect_impl(data_gen=data_gen,
+                                              conf=copy_and_update(ansi_enabled_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _gen_data_for_collect_op, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_groupby_collect_with_multi_distinct(data_gen):
+def test_hash_groupby_collect_with_multi_distinct(data_gen, kudo_enabled):
     def spark_fn(spark_session):
         return gen_df(spark_session, data_gen, length=100).groupby('a').agg(
             f.sort_array(f.collect_list('b')),
             f.sort_array(f.collect_set('b')),
             f.countDistinct('b'),
             f.countDistinct('c'))
-    assert_gpu_and_cpu_are_equal_collect(spark_fn)
+    assert_gpu_and_cpu_are_equal_collect(spark_fn, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 _replace_modes_non_distinct = [
     # Spark: GPU(Final) -> CPU(Partial)
@@ -851,13 +919,16 @@ def spark_fn(spark_session):
 @pytest.mark.parametrize('replace_mode', _replace_modes_non_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 def test_hash_groupby_collect_partial_replace_fallback(data_gen,
                                                        replace_mode,
                                                        aqe_enabled,
-                                                       use_obj_hash_agg):
+                                                       use_obj_hash_agg,
+                                                       kudo_enabled):
     conf = {'spark.rapids.sql.hashAgg.replaceMode': replace_mode,
             'spark.sql.adaptive.enabled': aqe_enabled,
-            'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg}
+            'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg,
+            kudo_enabled_conf_key: kudo_enabled}
 
     cpu_clz, gpu_clz = ['CollectList', 'CollectSet'], ['GpuCollectList', 'GpuCollectSet']
     exist_clz, non_exist_clz = [], []
@@ -901,14 +972,17 @@ def test_hash_groupby_collect_partial_replace_fallback(data_gen,
 @pytest.mark.parametrize('replace_mode', _replace_modes_single_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/4963')
 def test_hash_groupby_collect_partial_replace_with_distinct_fallback(data_gen,
                                                                      replace_mode,
                                                                      aqe_enabled,
-                                                                     use_obj_hash_agg):
+                                                                     use_obj_hash_agg,
+                                                                     kudo_enabled):
     conf = {'spark.rapids.sql.hashAgg.replaceMode': replace_mode,
             'spark.sql.adaptive.enabled': aqe_enabled,
-            'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg}
+            'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg,
+            kudo_enabled_conf_key: kudo_enabled}
     # test with single Distinct
     assert_cpu_and_gpu_are_equal_collect_with_capture(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -975,10 +1049,11 @@ def exact_percentile_reduction(df):
 
 @datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10233")
 @pytest.mark.parametrize('data_gen', exact_percentile_reduction_data_gen, ids=idfn)
-def test_exact_percentile_reduction(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_exact_percentile_reduction(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: exact_percentile_reduction(gen_df(spark, data_gen))
-    )
+        lambda spark: exact_percentile_reduction(gen_df(spark, data_gen)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 exact_percentile_reduction_cpu_fallback_data_gen = [
     [('val', data_gen),
@@ -992,9 +1067,10 @@ def test_exact_percentile_reduction(data_gen):
 @pytest.mark.parametrize('data_gen', exact_percentile_reduction_cpu_fallback_data_gen, ids=idfn)
 @pytest.mark.parametrize('replace_mode', ['partial', 'final|complete'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/9494')
 def test_exact_percentile_reduction_partial_fallback_to_cpu(data_gen,  replace_mode,
-                                                            use_obj_hash_agg):
+                                                            use_obj_hash_agg, kudo_enabled):
     cpu_clz, gpu_clz = ['Percentile'], ['GpuPercentileDefault']
     exist_clz, non_exist_clz = [], []
     # For aggregations without distinct, Databricks runtime removes the partial Aggregate stage (
@@ -1017,7 +1093,8 @@ def test_exact_percentile_reduction_partial_fallback_to_cpu(data_gen,  replace_m
         exist_classes=','.join(exist_clz),
         non_exist_classes=','.join(non_exist_clz),
         conf={'spark.rapids.sql.hashAgg.replaceMode': replace_mode,
-              'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg}
+              'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg,
+              kudo_enabled_conf_key: kudo_enabled}
     )
 
 
@@ -1051,10 +1128,11 @@ def exact_percentile_groupby(df):
 
 @ignore_order
 @pytest.mark.parametrize('data_gen', exact_percentile_groupby_data_gen, ids=idfn)
-def test_exact_percentile_groupby(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_exact_percentile_groupby(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: exact_percentile_groupby(gen_df(spark, data_gen))
-    )
+        lambda spark: exact_percentile_groupby(gen_df(spark, data_gen)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 exact_percentile_groupby_cpu_fallback_data_gen = [
     [('key', RepeatSeqGen(IntegerGen(), length=100)),
@@ -1070,8 +1148,10 @@ def test_exact_percentile_groupby(data_gen):
 @pytest.mark.parametrize('data_gen', exact_percentile_groupby_cpu_fallback_data_gen, ids=idfn)
 @pytest.mark.parametrize('replace_mode', ['partial', 'final|complete'], ids=idfn)
 @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/9494')
-def test_exact_percentile_groupby_partial_fallback_to_cpu(data_gen, replace_mode, use_obj_hash_agg):
+def test_exact_percentile_groupby_partial_fallback_to_cpu(data_gen, replace_mode,
+                                                          use_obj_hash_agg, kudo_enabled):
     cpu_clz, gpu_clz = ['Percentile'], ['GpuPercentileDefault']
     exist_clz, non_exist_clz = [], []
     # For aggregations without distinct, Databricks runtime removes the partial Aggregate stage (
@@ -1094,15 +1174,16 @@ def test_exact_percentile_groupby_partial_fallback_to_cpu(data_gen, replace_mode
         exist_classes=','.join(exist_clz),
         non_exist_classes=','.join(non_exist_clz),
         conf={'spark.rapids.sql.hashAgg.replaceMode': replace_mode,
-              'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg}
-    )
+              'spark.sql.execution.useObjectHashAggregateExec': use_obj_hash_agg,
+                kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu('ObjectHashAggregateExec', 'ShuffleExchangeExec',
                'HashAggregateExec', 'HashPartitioning',
                'ApproximatePercentile', 'Alias', 'Literal', 'AggregateExpression')
-def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback():
+def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback(kudo_enabled):
     assert_cpu_and_gpu_are_equal_sql_with_capture(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(LongGen(), length=20)),
                                      ('v', UniqueLongGen())], length=100),
@@ -1110,7 +1191,8 @@ def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback()
         non_exist_classes='GpuApproximatePercentile,GpuObjectHashAggregateExec',
         table_name='table',
         sql="""select k,
-        approx_percentile(v, array(0.25, 0.5, 0.75)) from table group by k""")
+        approx_percentile(v, array(0.25, 0.5, 0.75)) from table group by k""",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @approximate_float
 @ignore_order
@@ -1118,7 +1200,8 @@ def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback()
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_multiple_mode_query(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_multiple_mode_query(data_gen, conf, kudo_enabled):
     print_params(data_gen)
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
@@ -1132,7 +1215,7 @@ def test_hash_multiple_mode_query(data_gen, conf):
                  f.max('a'),
                  f.sumDistinct('b'),
                  f.countDistinct('c')
-                ), conf=conf)
+                ), conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @approximate_float
@@ -1143,11 +1226,12 @@ def test_hash_multiple_mode_query(data_gen, conf):
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs),
     ids=idfn)
-def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_multiple_mode_query_avg_distincts(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .selectExpr('avg(distinct a)', 'avg(distinct b)','avg(distinct c)'),
-        conf=conf)
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @approximate_float
@@ -1157,8 +1241,11 @@ def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
-    local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf, kudo_enabled):
+    local_conf = copy_and_update(conf,
+                                 {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                  kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=100),
         "hash_agg_table",
@@ -1181,8 +1268,10 @@ def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_query_max_with_multiple_distincts(data_gen, conf):
-    local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_query_max_with_multiple_distincts(data_gen, conf, kudo_enabled):
+    local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                        kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=100),
         "hash_agg_table",
@@ -1196,11 +1285,12 @@ def test_hash_query_max_with_multiple_distincts(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_count_with_filter(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_count_with_filter(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .selectExpr('count(a) filter (where c > 50)'),
-        conf=conf)
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @approximate_float
@@ -1209,7 +1299,8 @@ def test_hash_count_with_filter(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', _init_list + [_grpkey_short_mid_decimals, _grpkey_short_big_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_multiple_filters(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_multiple_filters(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=100),
         "hash_agg_table",
@@ -1217,15 +1308,17 @@ def test_hash_multiple_filters(data_gen, conf):
         'count(b) filter (where c > 100),' +
         'avg(b) filter (where b > 20),' +
         'min(a), max(b) filter (where c > 250) from hash_agg_table group by a',
-        conf)
+        conf = copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @approximate_float
 @ignore_order
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_grpkey_floats_with_nan_zero_grouping_keys,
                                       _grpkey_doubles_with_nan_zero_grouping_keys], ids=idfn)
-def test_hash_agg_with_nan_keys(data_gen):
-    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_agg_with_nan_keys(data_gen, kudo_enabled):
+    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                               kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1024),
         "hash_agg_table",
@@ -1245,8 +1338,10 @@ def test_hash_agg_with_nan_keys(data_gen):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen',  [_grpkey_structs_with_non_nested_children,
                                        _grpkey_nested_structs], ids=idfn)
-def test_hash_agg_with_struct_keys(data_gen):
-    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_agg_with_struct_keys(data_gen, kudo_enabled):
+    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                               kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1024),
         "hash_agg_table",
@@ -1267,8 +1362,10 @@ def test_hash_agg_with_struct_keys(data_gen):
                'Cast', 'Literal', 'Alias', 'AggregateExpression',
                'ShuffleExchangeExec', 'HashPartitioning')
 @pytest.mark.parametrize('data_gen',  [_grpkey_nested_structs_with_array_child], ids=idfn)
-def test_hash_agg_with_struct_of_array_fallback(data_gen):
-    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_agg_with_struct_of_array_fallback(data_gen, kudo_enabled):
+    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                               kudo_enabled_conf_key: kudo_enabled})
     assert_cpu_and_gpu_are_equal_sql_with_capture(
         lambda spark : gen_df(spark, data_gen, length=100),
         'select a, '
@@ -1290,12 +1387,13 @@ def test_hash_agg_with_struct_of_array_fallback(data_gen):
 @ignore_order
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [ _grpkey_floats_with_nulls_and_nans ], ids=idfn)
-def test_count_distinct_with_nan_floats(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_count_distinct_with_nan_floats(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1024),
         "hash_agg_table",
         'select a, count(distinct b) as count_distinct_bees from hash_agg_table group by a',
-        _float_conf)
+        copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 # TODO: Literal tests
 
@@ -1304,27 +1402,33 @@ def test_count_distinct_with_nan_floats(data_gen):
 _nested_gens = array_gens_sample + struct_gens_sample + map_gens_sample + [binary_gen]
 
 @pytest.mark.parametrize('data_gen', decimal_gens, ids=idfn)
-def test_first_last_reductions_decimal_types(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_first_last_reductions_decimal_types(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         # Coalesce and sort are to make sure that first and last, which are non-deterministic
         # become deterministic
         lambda spark: unary_op_df(spark, data_gen).coalesce(1).selectExpr(
-            'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'))
+            'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', _nested_gens, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_first_last_reductions_nested_types(data_gen):
+def test_first_last_reductions_nested_types(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         # Coalesce and sort are to make sure that first and last, which are non-deterministic
         # become deterministic
         lambda spark: unary_op_df(spark, data_gen).coalesce(1).selectExpr(
-            'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'))
+            'first(a)', 'last(a)', 'first(a, true)', 'last(a, true)'),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', _all_basic_gens_with_all_nans_cases, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @allow_non_gpu(*non_utc_allow)
-def test_generic_reductions(data_gen):
-    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
+def test_generic_reductions(data_gen, kudo_enabled):
+    local_conf = copy_and_update(_float_conf, {'spark.sql.legacy.allowParameterlessCount': 'true',
+                                               kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_collect(
         # Coalesce and sort are to make sure that first and last, which are non-deterministic
         # become deterministic
@@ -1342,43 +1446,50 @@ def test_generic_reductions(data_gen):
 # min_by and max_by are supported for pyspark since 3.3.0 so tested with sql
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_basic_gens + nested_gens_sample, ids=idfn)
-def test_hash_groupby_min_max_by_unique(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_min_max_by_unique(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark: three_col_df(spark, byte_gen, data_gen, UniqueLongGen()),
         "tbl",
-        "SELECT a, min_by(b, c), max_by(b, c) FROM tbl GROUP BY a")
+        "SELECT a, min_by(b, c), max_by(b, c) FROM tbl GROUP BY a",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # When the ordering column is not unique this gpu will always return the minimal/maximal value
 # while spark's result is non-deterministic. So we need to set the column b and c to be
 # the same to make the result comparable.
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_gen_no_floats + struct_gens_sample_with_decimal128 + array_gens_sample, ids=idfn)
-def test_hash_groupby_min_max_by_same(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_min_max_by_same(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark: two_col_df(spark, byte_gen, data_gen),
         "tbl",
-        "SELECT a, min_by(b, b), max_by(b, b) FROM tbl GROUP BY a")
+        "SELECT a, min_by(b, b), max_by(b, b) FROM tbl GROUP BY a",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
-def test_reduction_with_min_max_by_unique():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_reduction_with_min_max_by_unique(kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: two_col_df(spark, int_gen, UniqueLongGen()).selectExpr(
-            "min_by(a, b)", "max_by(a, b)")
-    )
+            "min_by(a, b)", "max_by(a, b)"),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # When the ordering column is not unique this gpu will always return the minimal/maximal value 
 # while spark's result is non-deterministic. So we need to set the column b and c to be
 # the same to make the result comparable.
 @pytest.mark.parametrize('data_gen', basic_gen_no_floats + struct_gens_sample_with_decimal128 + array_gens_sample, ids=idfn)
-def test_reduction_with_max_by_same(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_reduction_with_max_by_same(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr(
-            "min_by(a, a)", "max_by(a, a)")
-    )
+            "min_by(a, a)", "max_by(a, a)"),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + _nested_gens, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @allow_non_gpu(*non_utc_allow)
-def test_count(data_gen):
+def test_count(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : unary_op_df(spark, data_gen) \
             .selectExpr(
@@ -1386,42 +1497,49 @@ def test_count(data_gen):
             'count()',
             'count()',
             'count(1)'),
-        conf = {'spark.sql.legacy.allowParameterlessCount': 'true'})
+        conf = {'spark.sql.legacy.allowParameterlessCount': 'true',
+                kudo_enabled_conf_key: kudo_enabled})
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', all_basic_gens, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_distinct_count_reductions(data_gen):
+def test_distinct_count_reductions(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).selectExpr(
-                'count(DISTINCT a)'))
+                'count(DISTINCT a)'),
+        conf= {kudo_enabled_conf_key: kudo_enabled})
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [float_gen, double_gen], ids=idfn)
-def test_distinct_float_count_reductions(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_distinct_float_count_reductions(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : binary_op_df(spark, data_gen).selectExpr(
-                'count(DISTINCT a)'))
+                'count(DISTINCT a)'),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @approximate_float
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', numeric_gens + [decimal_gen_64bit, decimal_gen_128bit], ids=idfn)
-def test_arithmetic_reductions(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_arithmetic_reductions(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : unary_op_df(spark, data_gen).selectExpr(
                 'sum(a)',
                 'avg(a)'),
-            conf = _float_conf)
+            conf = copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @pytest.mark.parametrize('data_gen',
                          all_basic_gens + decimal_gens + _nested_gens,
                          ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_collect_list_reductions(data_gen):
+def test_collect_list_reductions(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         # coalescing because collect_list is not deterministic
         lambda spark: unary_op_df(spark, data_gen).coalesce(1).selectExpr('collect_list(a)'),
-        conf=_float_conf)
+        conf= copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}) )
 
 _no_neg_zero_all_basic_gens = [byte_gen, short_gen, int_gen, long_gen,
         # -0.0 cannot work because of -0.0 == 0.0 in cudf for distinct and
@@ -1435,11 +1553,12 @@ def test_collect_list_reductions(data_gen):
 @pytest.mark.parametrize('data_gen',
                          _no_neg_zero_all_basic_gens + decimal_gens + _struct_only_nested_gens,
                          ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_collect_set_reductions(data_gen):
+def test_collect_set_reductions(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: unary_op_df(spark, data_gen).selectExpr('sort_array(collect_set(a))'),
-        conf=_float_conf)
+        conf=copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 def test_collect_empty():
     assert_gpu_and_cpu_are_equal_collect(
@@ -1449,8 +1568,9 @@ def test_collect_empty():
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen + _nested_gens, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_groupby_first_last(data_gen):
+def test_groupby_first_last(data_gen, kudo_enabled):
     gen_fn = [('a', RepeatSeqGen(LongGen(), length=20)), ('b', data_gen)]
     agg_fn = lambda df: df.groupBy('a').agg(
         f.first('b'), f.last('b'), f.first('b', True), f.last('b', True))
@@ -1459,12 +1579,14 @@ def test_groupby_first_last(data_gen):
         # We set parallelism 1 to prevent nondeterministic results because of distributed setup.
         lambda spark: agg_fn(gen_df(spark, gen_fn, num_slices=1)),
         # Disable RADIX sort as the CPU sort is not stable if it is
-        conf={'spark.sql.sort.enableRadixSort': False})
+        conf={'spark.sql.sort.enableRadixSort': False,
+              kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen + _struct_only_nested_gens, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_sorted_groupby_first_last(data_gen):
+def test_sorted_groupby_first_last(data_gen, kudo_enabled):
     gen_fn = [('a', RepeatSeqGen(LongGen(), length=20)), ('b', data_gen)]
     # sort by more than the group by columns to be sure that first/last don't remove the ordering
     agg_fn = lambda df: df.orderBy('a', 'b').groupBy('a').agg(
@@ -1474,7 +1596,8 @@ def test_sorted_groupby_first_last(data_gen):
         # We set parallelism and partitions to 1 to prevent nondeterministic results because
         # of distributed setups.
         lambda spark: agg_fn(gen_df(spark, gen_fn, num_slices=1)),
-        conf = {'spark.sql.shuffle.partitions': '1'})
+        conf = {'spark.sql.shuffle.partitions': '1',
+                kudo_enabled_conf_key: kudo_enabled})
 
 # Spark has a sorting bug with decimals, see https://issues.apache.org/jira/browse/SPARK-40129.
 # Have pytest do the sorting rather than Spark as a workaround.
@@ -1482,11 +1605,13 @@ def test_sorted_groupby_first_last(data_gen):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('count_func', [f.count, f.countDistinct])
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_agg_count(data_gen, count_func):
+def test_agg_count(data_gen, count_func, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, [('a', data_gen), ('b', data_gen)],
-                              length=1024).groupBy('a').agg(count_func("b")))
+                              length=1024).groupBy('a').agg(count_func("b")),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # Spark has a sorting bug with decimals, see https://issues.apache.org/jira/browse/SPARK-40129.
 # Have pytest do the sorting rather than Spark as a workaround.
@@ -1497,11 +1622,13 @@ def test_agg_count(data_gen, count_func):
                          [ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))
                          , binary_gen], ids=idfn)
 @pytest.mark.parametrize('count_func', [f.count, f.countDistinct])
-def test_groupby_list_types_fallback(data_gen, count_func):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_groupby_list_types_fallback(data_gen, count_func, kudo_enabled):
     assert_gpu_fallback_collect(
         lambda spark : gen_df(spark, [('a', data_gen), ('b', data_gen)],
                               length=1024).groupBy('a').agg(count_func("b")),
-        "HashAggregateExec")
+        "HashAggregateExec",
+    conf = {kudo_enabled_conf_key: kudo_enabled})
 
 def subquery_create_temp_views(spark, expr):
     t1 = "select * from values (1,2) as t1(a,b)"
@@ -1525,10 +1652,12 @@ def subquery_create_temp_views(spark, expr):
   "select sum(distinct(if(c > (select sum(distinct(a)) from t1), d, 0))) as csum " +
     "from t2 group by c"
 ])
-def test_subquery_in_agg(adaptive, expr):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_subquery_in_agg(adaptive, expr, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
       lambda spark: subquery_create_temp_views(spark, expr),
-        conf = {"spark.sql.adaptive.enabled" : adaptive})
+        conf = {"spark.sql.adaptive.enabled" : adaptive,
+                kudo_enabled_conf_key: kudo_enabled})
 
 
 # TODO support multi-level structs https://github.com/NVIDIA/spark-rapids/issues/2438
@@ -1558,12 +1687,13 @@ def workaround_dedupe_by_value(df, num_cols):
     ], nullable=False),
 ], ids=idfn)
 @ignore_order(local=True)
-def test_struct_groupby_count(key_data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_struct_groupby_count(key_data_gen, kudo_enabled):
     def group_by_count(spark):
         df = two_col_df(spark, key_data_gen, IntegerGen())
         assert_single_level_struct(df)
         return workaround_dedupe_by_value(df.groupBy(df.a).count(), 3)
-    assert_gpu_and_cpu_are_equal_collect(group_by_count)
+    assert_gpu_and_cpu_are_equal_collect(group_by_count, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
@@ -1578,13 +1708,15 @@ def group_by_count(spark):
     ], nullable=False)
 ], ids=idfn)
 @ignore_order(local=True)
-def test_struct_cast_groupby_count(cast_struct_tostring, key_data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_struct_cast_groupby_count(cast_struct_tostring, key_data_gen, kudo_enabled):
     def _group_by_struct_or_cast(spark):
         df = two_col_df(spark, key_data_gen, IntegerGen())
         assert_single_level_struct(df)
         return df.groupBy(df.a.cast(StringType())).count()
     assert_gpu_and_cpu_are_equal_collect(_group_by_struct_or_cast, {
-        'spark.sql.legacy.castComplexTypesToString.enabled': cast_struct_tostring == 'LEGACY'
+        'spark.sql.legacy.castComplexTypesToString.enabled': cast_struct_tostring == 'LEGACY',
+        kudo_enabled_conf_key: kudo_enabled
     })
 
 
@@ -1601,12 +1733,13 @@ def _group_by_struct_or_cast(spark):
         ]))], nullable=False),
 ], ids=idfn)
 @ignore_order(local=True)
-def test_struct_count_distinct(key_data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_struct_count_distinct(key_data_gen, kudo_enabled):
     def _count_distinct_by_struct(spark):
         df = gen_df(spark, key_data_gen)
         assert_single_level_struct(df)
         return df.agg(f.countDistinct(df.a))
-    assert_gpu_and_cpu_are_equal_collect(_count_distinct_by_struct)
+    assert_gpu_and_cpu_are_equal_collect(_count_distinct_by_struct, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
@@ -1623,96 +1756,112 @@ def _count_distinct_by_struct(spark):
         ]))], nullable=False),
 ], ids=idfn)
 @ignore_order(local=True)
-def test_struct_count_distinct_cast(cast_struct_tostring, key_data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_struct_count_distinct_cast(cast_struct_tostring, key_data_gen, kudo_enabled):
     def _count_distinct_by_struct(spark):
         df = gen_df(spark, key_data_gen)
         assert_single_level_struct(df)
         return df.agg(f.countDistinct(df.a.cast(StringType())))
     assert_gpu_and_cpu_are_equal_collect(_count_distinct_by_struct, {
-        'spark.sql.legacy.castComplexTypesToString.enabled': cast_struct_tostring == 'LEGACY'
+        'spark.sql.legacy.castComplexTypesToString.enabled': cast_struct_tostring == 'LEGACY',
+        kudo_enabled_conf_key: kudo_enabled
     })
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @ignore_order(local=True)
-def test_reduction_nested_struct():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_reduction_nested_struct(kudo_enabled):
     def do_it(spark):
         df = unary_op_df(spark, StructGen([('aa', StructGen([('aaa', IntegerGen(min_val=0, max_val=4))]))]))
         return df.agg(f.sum(df.a.aa.aaa))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @ignore_order(local=True)
-def test_reduction_nested_array():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_reduction_nested_array(kudo_enabled):
     def do_it(spark):
         df = unary_op_df(spark, ArrayGen(StructGen([('aa', IntegerGen(min_val=0, max_val=4))])))
         return df.agg(f.sum(df.a[1].aa))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 # The map here is a child not a top level, because we only support GetMapValue on String to String maps.
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @ignore_order(local=True)
-def test_reduction_nested_map():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_reduction_nested_map(kudo_enabled):
     def do_it(spark):
         df = unary_op_df(spark, ArrayGen(MapGen(StringGen('a{1,5}', nullable=False), StringGen('[ab]{1,5}'))))
         return df.agg(f.min(df.a[1]["a"]))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
-def test_agg_nested_struct():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_agg_nested_struct(kudo_enabled):
     def do_it(spark):
         df = two_col_df(spark, StringGen('k{1,5}'), StructGen([('aa', StructGen([('aaa', IntegerGen(min_val=0, max_val=4))]))]))
         return df.groupBy('a').agg(f.sum(df.b.aa.aaa))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
-def test_agg_nested_array():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_agg_nested_array(kudo_enabled):
     def do_it(spark):
         df = two_col_df(spark, StringGen('k{1,5}'), ArrayGen(StructGen([('aa', IntegerGen(min_val=0, max_val=4))])))
         return df.groupBy('a').agg(f.sum(df.b[1].aa))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # The map here is a child not a top level, because we only support GetMapValue on String to String maps.
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
-def test_agg_nested_map():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_agg_nested_map(kudo_enabled):
     def do_it(spark):
         df = two_col_df(spark, StringGen('k{1,5}'), ArrayGen(MapGen(StringGen('a{1,5}', nullable=False), StringGen('[ab]{1,5}'))))
         return df.groupBy('a').agg(f.min(df.b[1]["a"]))
-    assert_gpu_and_cpu_are_equal_collect(do_it)
+    assert_gpu_and_cpu_are_equal_collect(do_it, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_reduction(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_reduction(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('v', DoubleGen())], length=100),
         [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_reduction_single_row(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_reduction_single_row(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('v', DoubleGen())], length=1),
         [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_reduction_no_rows(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_reduction_no_rows(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('v', DoubleGen())], length=0),
         [0.05, 0.25, 0.5, 0.75, 0.95], conf, reduction = True)
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_byte(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_byte(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', ByteGen())], length=100),
@@ -1721,8 +1870,10 @@ def test_hash_groupby_approx_percentile_byte(aqe_enabled):
 @incompat
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/11198
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_byte_scalar(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_byte_scalar(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', ByteGen())], length=100),
@@ -1730,8 +1881,10 @@ def test_hash_groupby_approx_percentile_byte_scalar(aqe_enabled):
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_long_repeated_keys(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_long_repeated_keys(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(LongGen(), length=20)),
                                      ('v', UniqueLongGen())], length=100),
@@ -1739,8 +1892,10 @@ def test_hash_groupby_approx_percentile_long_repeated_keys(aqe_enabled):
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_long(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_long(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', UniqueLongGen())], length=100),
@@ -1749,8 +1904,10 @@ def test_hash_groupby_approx_percentile_long(aqe_enabled):
 @incompat
 @disable_ansi_mode  # ANSI mode is tested in test_hash_groupby_approx_percentile_long_single_ansi
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_long_single(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_long_single(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', UniqueLongGen())], length=100),
@@ -1760,13 +1917,15 @@ def test_hash_groupby_approx_percentile_long_single(aqe_enabled):
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
 @allow_non_gpu('ObjectHashAggregateExec', 'ShuffleExchangeExec')
-def test_hash_groupby_approx_percentile_long_single_ansi(aqe_enabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_long_single_ansi(aqe_enabled, kudo_enabled):
     """
     Tests approx_percentile with ANSI mode enabled.
     Note: In ANSI mode, the test query exercises ObjectHashAggregateExec and ShuffleExchangeExec,
           which fall back to CPU.
     """
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     conf.update(ansi_enabled_conf)
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
@@ -1776,8 +1935,10 @@ def test_hash_groupby_approx_percentile_long_single_ansi(aqe_enabled):
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_double(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_double(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', DoubleGen())], length=100),
@@ -1785,8 +1946,10 @@ def test_hash_groupby_approx_percentile_double(aqe_enabled):
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
-def test_hash_groupby_approx_percentile_double_single(aqe_enabled):
-    conf = {'spark.sql.adaptive.enabled': aqe_enabled}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_double_single(aqe_enabled, kudo_enabled):
+    conf = {'spark.sql.adaptive.enabled': aqe_enabled,
+            kudo_enabled_conf_key: kudo_enabled}
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', StringGen(nullable=False)),
                                      ('v', DoubleGen())], length=100),
@@ -1794,13 +1957,15 @@ def test_hash_groupby_approx_percentile_double_single(aqe_enabled):
 
 @incompat
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @ignore_order(local=True)
 @allow_non_gpu('TakeOrderedAndProjectExec', 'Alias', 'Cast', 'ObjectHashAggregateExec', 'AggregateExpression',
     'ApproximatePercentile', 'Literal', 'ShuffleExchangeExec', 'HashPartitioning', 'CollectLimitExec')
-def test_hash_groupby_approx_percentile_partial_fallback_to_cpu(aqe_enabled):
+def test_hash_groupby_approx_percentile_partial_fallback_to_cpu(aqe_enabled, kudo_enabled):
     conf = {
         'spark.rapids.sql.hashAgg.replaceMode': 'partial',
-        'spark.sql.adaptive.enabled': aqe_enabled
+        'spark.sql.adaptive.enabled': aqe_enabled,
+        kudo_enabled_conf_key: kudo_enabled
     }
 
     def approx_percentile_query(spark):
@@ -1813,66 +1978,80 @@ def approx_percentile_query(spark):
 
 @incompat
 @ignore_order(local=True)
-def test_hash_groupby_approx_percentile_decimal32():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal32(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(6, 2))]),
-        [0.05, 0.25, 0.5, 0.75, 0.95])
+        [0.05, 0.25, 0.5, 0.75, 0.95],
+    conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @incompat
 @ignore_order(local=True)
 @disable_ansi_mode  # ANSI mode is tested with test_hash_groupby_approx_percentile_decimal_single_ansi.
-def test_hash_groupby_approx_percentile_decimal32_single():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal32_single(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(6, 2))]),
-        0.05)
+        0.05,
+    conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @incompat
 @ignore_order(local=True)
 @allow_non_gpu('ObjectHashAggregateExec', 'ShuffleExchangeExec')
-def test_hash_groupby_approx_percentile_decimal_single_ansi():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal_single_ansi(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(6, 2))]),
-        0.05, conf=ansi_enabled_conf)
+        0.05,
+        conf=copy_and_update(ansi_enabled_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 
 @incompat
 @ignore_order(local=True)
-def test_hash_groupby_approx_percentile_decimal64():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal64(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(10, 9))]),
-        [0.05, 0.25, 0.5, 0.75, 0.95])
+        [0.05, 0.25, 0.5, 0.75, 0.95],
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @incompat
 @disable_ansi_mode  # ANSI mode is tested with test_hash_groupby_approx_percentile_decimal_single_ansi.
 @ignore_order(local=True)
-def test_hash_groupby_approx_percentile_decimal64_single():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal64_single(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(10, 9))]),
-        0.05)
+        0.05,
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @incompat
 @ignore_order(local=True)
-def test_hash_groupby_approx_percentile_decimal128():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal128(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(19, 18))]),
-        [0.05, 0.25, 0.5, 0.75, 0.95])
+        [0.05, 0.25, 0.5, 0.75, 0.95],
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @incompat
 @disable_ansi_mode  # ANSI mode is tested with test_hash_groupby_approx_percentile_decimal_single_ansi.
 @ignore_order(local=True)
-def test_hash_groupby_approx_percentile_decimal128_single():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_groupby_approx_percentile_decimal128_single(kudo_enabled):
     compare_percentile_approx(
         lambda spark: gen_df(spark, [('k', RepeatSeqGen(ByteGen(nullable=False), length=2)),
                                      ('v', DecimalGen(19, 18))]),
-        0.05)
+        0.05,
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # The percentile approx tests differ from other tests because we do not expect the CPU and GPU to produce the same
 # results due to the different algorithms being used. Instead we compute an exact percentile on the CPU and then
@@ -1967,20 +2146,22 @@ def create_percentile_sql(func_name, percentiles, reduction):
 @disable_ansi_mode  # ANSI mode is tested in test_hash_grpby_avg_nulls_ansi
 @pytest.mark.parametrize('data_gen', [_grpkey_strings_with_extra_nulls], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_avg_nulls(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_avg_nulls(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a')
           .agg(f.avg('c')),
-        conf=conf
-    )
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @ignore_order
 @allow_non_gpu('HashAggregateExec', 'Alias', 'AggregateExpression', 'Cast',
   'HashPartitioning', 'ShuffleExchangeExec', 'Average')
 @pytest.mark.parametrize('data_gen', [_grpkey_strings_with_extra_nulls], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_avg_nulls_ansi(data_gen, conf):
-    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_grpby_avg_nulls_ansi(data_gen, conf, kudo_enabled):
+    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': 'true',
+                                        kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_fallback_collect(
         lambda spark: gen_df(spark, data_gen, length=100).groupby('a')
           .agg(f.avg('c')),
@@ -1992,20 +2173,22 @@ def test_hash_grpby_avg_nulls_ansi(data_gen, conf):
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('data_gen', [_grpkey_strings_with_extra_nulls], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_avg_nulls(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_avg_nulls(data_gen, conf, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
           .agg(f.avg('c')),
-        conf=conf
-    )
+        conf=copy_and_update(conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 @ignore_order
 @allow_non_gpu('HashAggregateExec', 'Alias', 'AggregateExpression', 'Cast',
   'HashPartitioning', 'ShuffleExchangeExec', 'Average')
 @pytest.mark.parametrize('data_gen', [_grpkey_strings_with_extra_nulls], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_avg_nulls_ansi(data_gen, conf):
-    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': 'true'})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_reduction_avg_nulls_ansi(data_gen, conf, kudo_enabled):
+    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': 'true',
+                                        kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_fallback_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
           .agg(f.avg('c')),
@@ -2018,43 +2201,47 @@ def test_hash_reduction_avg_nulls_ansi(data_gen, conf):
 @allow_non_gpu('HashAggregateExec', 'Alias', 'AggregateExpression', 'Cast',
   'HashPartitioning', 'ShuffleExchangeExec', 'Sum')
 @pytest.mark.parametrize('data_gen', _no_overflow_ansi_gens, ids=idfn)
-def test_sum_fallback_when_ansi_enabled(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sum_fallback_when_ansi_enabled(data_gen, kudo_enabled):
     def do_it(spark):
         df = gen_df(spark, [('a', data_gen), ('b', data_gen)], length=100)
         return df.groupBy('a').agg(f.sum("b"))
 
     assert_gpu_fallback_collect(do_it, 'Sum',
-        conf={'spark.sql.ansi.enabled': 'true'})
+        conf={'spark.sql.ansi.enabled': 'true', kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)
 @allow_non_gpu('HashAggregateExec', 'Alias', 'AggregateExpression', 'Cast',
   'HashPartitioning', 'ShuffleExchangeExec', 'Average')
 @pytest.mark.parametrize('data_gen', _no_overflow_ansi_gens, ids=idfn)
-def test_avg_fallback_when_ansi_enabled(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_avg_fallback_when_ansi_enabled(data_gen, kudo_enabled):
     def do_it(spark):
         df = gen_df(spark, [('a', data_gen), ('b', data_gen)], length=100)
         return df.groupBy('a').agg(f.avg("b"))
 
     assert_gpu_fallback_collect(do_it, 'Average',
-        conf={'spark.sql.ansi.enabled': 'true'})
+        conf={'spark.sql.ansi.enabled': 'true', kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)
 @allow_non_gpu('HashAggregateExec', 'Alias', 'AggregateExpression',
   'HashPartitioning', 'ShuffleExchangeExec', 'Count', 'Literal')
 @pytest.mark.parametrize('data_gen', _no_overflow_ansi_gens, ids=idfn)
-def test_count_fallback_when_ansi_enabled(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_count_fallback_when_ansi_enabled(data_gen, kudo_enabled):
     def do_it(spark):
         df = gen_df(spark, [('a', data_gen), ('b', data_gen)], length=100)
         return df.groupBy('a').agg(f.count("b"), f.count("*"))
 
     assert_gpu_fallback_collect(do_it, 'Count',
-        conf={'spark.sql.ansi.enabled': 'true'})
+        conf={'spark.sql.ansi.enabled': 'true', kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', _no_overflow_ansi_gens, ids=idfn)
-def test_no_fallback_when_ansi_enabled(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_no_fallback_when_ansi_enabled(data_gen, kudo_enabled):
     def do_it(spark):
         df = gen_df(spark, [('a', data_gen), ('b', data_gen)], length=100)
         # coalescing because first/last are not deterministic
@@ -2062,7 +2249,7 @@ def do_it(spark):
         return df.groupBy('a').agg(f.first("b"), f.last("b"), f.min("b"), f.max("b"))
 
     assert_gpu_and_cpu_are_equal_collect(do_it,
-        conf={'spark.sql.ansi.enabled': 'true'})
+        conf={'spark.sql.ansi.enabled': 'true', kudo_enabled_conf_key: kudo_enabled})
 
 # Tests for standard deviation and variance aggregations.
 @ignore_order(local=True)
@@ -2070,9 +2257,11 @@ def do_it(spark):
 @incompat
 @pytest.mark.parametrize('data_gen', _init_list_with_decimals, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_std_variance(data_gen, conf):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_std_variance(data_gen, conf, kudo_enabled):
     local_conf = copy_and_update(conf, {
-        'spark.rapids.sql.castDecimalToFloat.enabled': 'true'})
+        'spark.rapids.sql.castDecimalToFloat.enabled': 'true',
+        kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1000),
         "data_table",
@@ -2101,8 +2290,10 @@ def test_std_variance(data_gen, conf):
 @pytest.mark.parametrize('data_gen', [_grpkey_strings_with_extra_nulls], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @pytest.mark.parametrize('ansi_enabled', ['true', 'false'])
-def test_std_variance_nulls(data_gen, conf, ansi_enabled):
-    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': ansi_enabled})
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_std_variance_nulls(data_gen, conf, ansi_enabled, kudo_enabled):
+    local_conf = copy_and_update(conf, {'spark.sql.ansi.enabled': ansi_enabled,
+                                        kudo_enabled_conf_key: kudo_enabled})
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1000),
         "data_table",
@@ -2138,13 +2329,16 @@ def test_std_variance_nulls(data_gen, conf, ansi_enabled):
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @pytest.mark.parametrize('replace_mode', _replace_modes_non_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @pytest.mark.xfail(condition=is_databricks104_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/4963')
 def test_std_variance_partial_replace_fallback(data_gen,
                                                conf,
                                                replace_mode,
-                                               aqe_enabled):
+                                               aqe_enabled,
+                                               kudo_enabled):
     local_conf = copy_and_update(conf, {'spark.rapids.sql.hashAgg.replaceMode': replace_mode,
-                                        'spark.sql.adaptive.enabled': aqe_enabled})
+                                        'spark.sql.adaptive.enabled': aqe_enabled,
+                                        kudo_enabled_conf_key: kudo_enabled})
 
     exist_clz = ['StddevPop', 'StddevSamp', 'VariancePop', 'VarianceSamp',
                  'GpuStddevPop', 'GpuStddevSamp', 'GpuVariancePop', 'GpuVarianceSamp']
@@ -2189,8 +2383,9 @@ def test_std_variance_partial_replace_fallback(data_gen,
     null_gen] + array_gens_sample + struct_gens_sample
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen',  gens_for_max_min, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_min_max_in_groupby_and_reduction(data_gen):
+def test_min_max_in_groupby_and_reduction(data_gen, kudo_enabled):
     df_gen = [('a', data_gen), ('b', RepeatSeqGen(IntegerGen(), length=20))]
 
     # test max
@@ -2198,44 +2393,48 @@ def test_min_max_in_groupby_and_reduction(data_gen):
         lambda spark : gen_df(spark, df_gen),
         "hash_agg_table",
         'select b, max(a) from hash_agg_table group by b',
-        _float_conf)
+        copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, df_gen),
         "hash_agg_table",
         'select max(a) from hash_agg_table',
-        _float_conf)
+        copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
     # test min
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, df_gen, length=1024),
         "hash_agg_table",
         'select b, min(a) from hash_agg_table group by b',
-        _float_conf)
+        copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, df_gen, length=1024),
         "hash_agg_table",
         'select min(a) from hash_agg_table',
-        _float_conf)
+        copy_and_update(_float_conf, {kudo_enabled_conf_key: kudo_enabled}))
 
 # Some Spark implementations will optimize this aggregation as a
 # complete aggregation (i.e.: only one aggregation node in the plan)
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
-def test_hash_aggregate_complete_with_grouping_expressions():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_aggregate_complete_with_grouping_expressions(kudo_enabled):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : spark.range(10).withColumn("id2", f.col("id")),
         "hash_agg_complete_table",
-        "select id, avg(id) from hash_agg_complete_table group by id, id2 + 1")
+        "select id, avg(id) from hash_agg_complete_table group by id, id2 + 1",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('cast_key_to', ["byte", "short", "int",
     "long", "string", "DECIMAL(38,5)"], ids=idfn)
-def test_hash_agg_force_pre_sort(cast_key_to):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_agg_force_pre_sort(cast_key_to, kudo_enabled):
     def do_it(spark):
         gen = StructGen([("key", UniqueLongGen()), ("value", long_gen)], nullable=False)
         df = gen_df(spark, gen)
         return df.selectExpr("CAST((key div 10) as " + cast_key_to + ") as key", "value").groupBy("key").sum("value")
     assert_gpu_and_cpu_are_equal_collect(do_it,
         conf={'spark.rapids.sql.agg.forceSinglePassPartialSort': True,
-            'spark.rapids.sql.agg.singlePassPartialSortEnabled': True})
+            'spark.rapids.sql.agg.singlePassPartialSortEnabled': True,
+            kudo_enabled_conf_key: kudo_enabled})
diff --git a/integration_tests/src/main/python/hive_parquet_write_test.py b/integration_tests/src/main/python/hive_parquet_write_test.py
index e66b889a986..540db74a1ad 100644
--- a/integration_tests/src/main/python/hive_parquet_write_test.py
+++ b/integration_tests/src/main/python/hive_parquet_write_test.py
@@ -25,9 +25,10 @@
 # "GpuInsertIntoHiveTable" for Parquet write.
 _write_to_hive_conf = {"spark.sql.hive.convertMetastoreParquet": False}
 
-_hive_bucket_gens = [
-    boolean_gen, byte_gen, short_gen, int_gen, long_gen, string_gen, float_gen, double_gen,
+_hive_bucket_gens_sans_bools = [
+    byte_gen, short_gen, int_gen, long_gen, string_gen, float_gen, double_gen,
     DateGen(start=date(1590, 1, 1)), _restricted_timestamp()]
+_hive_bucket_gens = [boolean_gen] + _hive_bucket_gens_sans_bools
 
 _hive_basic_gens = _hive_bucket_gens + [
     DecimalGen(precision=19, scale=1, nullable=True),
diff --git a/integration_tests/src/main/python/hive_write_test.py b/integration_tests/src/main/python/hive_write_test.py
index 945cc4806fb..af825a99810 100644
--- a/integration_tests/src/main/python/hive_write_test.py
+++ b/integration_tests/src/main/python/hive_write_test.py
@@ -29,8 +29,11 @@ def _restricted_timestamp(nullable=True):
                         end=datetime(2262, 4, 11, tzinfo=timezone.utc),
                         nullable=nullable)
 
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen
 _basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-                     string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+                     string_gen, DateGen(start=date(1590, 1, 1)),
                      _restricted_timestamp()
                ] + decimal_gens
 
@@ -45,8 +48,11 @@ def _restricted_timestamp(nullable=True):
     ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10),
     ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))]
 
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen
 _map_gens = [simple_string_to_string_map_gen] + [MapGen(f(nullable=False), f()) for f in [
-    BooleanGen, ByteGen, ShortGen, IntegerGen, LongGen, FloatGen, DoubleGen,
+    ByteGen, ShortGen, IntegerGen, LongGen, FloatGen, DoubleGen,
     lambda nullable=True: _restricted_timestamp(nullable=nullable),
     lambda nullable=True: DateGen(start=date(1590, 1, 1), nullable=nullable),
     lambda nullable=True: DecimalGen(precision=15, scale=1, nullable=nullable),
diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
index 703fbe80230..936310bedeb 100644
--- a/integration_tests/src/main/python/join_test.py
+++ b/integration_tests/src/main/python/join_test.py
@@ -96,6 +96,8 @@
                    'spark.sql.shuffle.partitions': '2',
                   }
 
+kudo_enabled_conf_key = "spark.rapids.shuffle.kudo.serializer.enabled"
+
 def create_df(spark, data_gen, left_length, right_length):
     left = binary_op_df(spark, data_gen, length=left_length)
     right = binary_op_df(spark, data_gen, length=right_length).withColumnRenamed("a", "r_a")\
@@ -125,53 +127,77 @@ def join_batch_size_test_params(*args):
 @ignore_order(local=True)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
 @pytest.mark.parametrize("aqe_enabled", ["true", "false"], ids=idfn)
-def test_right_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_right_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, long_gen, 50, 0)
         return left.join(broadcast(right), how=join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={ "spark.sql.adaptive.enabled": aqe_enabled })
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        "spark.sql.adaptive.enabled": aqe_enabled,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
 @pytest.mark.parametrize("aqe_enabled", ["true", "false"], ids=idfn)
-def test_left_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_left_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, long_gen, 0, 50)
         return left.join(broadcast(right), how=join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={ "spark.sql.adaptive.enabled": aqe_enabled })
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        "spark.sql.adaptive.enabled": aqe_enabled,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
 @pytest.mark.parametrize("aqe_enabled", ["true", "false"], ids=idfn)
-def test_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_without_condition_empty(join_type, aqe_enabled, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, long_gen, 0, 0)
         return left.join(broadcast(right), how=join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={ "spark.sql.adaptive.enabled": aqe_enabled })
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        "spark.sql.adaptive.enabled": aqe_enabled,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_right_broadcast_nested_loop_join_without_condition_empty_small_batch(join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_right_broadcast_nested_loop_join_without_condition_empty_small_batch(join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, long_gen, 50, 0)
         return left.join(broadcast(right), how=join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.sql.adaptive.enabled': 'true'})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.sql.adaptive.enabled': 'true',
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_empty_broadcast_hash_join(join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_empty_broadcast_hash_join(join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, long_gen, 50, 0)
         return left.join(right.hint("broadcast"), left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.sql.adaptive.enabled': 'true'})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.sql.adaptive.enabled': 'true',
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_broadcast_hash_join_constant_keys(join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_hash_join_constant_keys(join_type, kudo_enabled):
     def do_join(spark):
         left = spark.range(10).withColumn("s", lit(1))
         right = spark.range(10000).withColumn("r_s", lit(1))
         return left.join(right.hint("broadcast"), left.s == right.r_s, join_type)
-    assert_gpu_and_cpu_row_counts_equal(do_join, conf={'spark.sql.adaptive.enabled': 'true'})
+    assert_gpu_and_cpu_row_counts_equal(do_join, conf={
+        'spark.sql.adaptive.enabled': 'true',
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
@@ -181,21 +207,29 @@ def do_join(spark):
     (all_gen, '1g'),
     (join_small_batch_gens, '1000')), ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
-def test_sortmerge_join(data_gen, join_type, batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join(data_gen, join_type, batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
         return left.join(right, left.a == right.r_a, join_type)
-    conf = copy_and_update(_sortmerge_join_conf, {'spark.rapids.sql.batchSizeBytes': batch_size})
+    conf = copy_and_update(_sortmerge_join_conf, {
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
-def test_sortmerge_join_ridealong(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_ridealong(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_ridealong_df(spark, short_gen, data_gen, 500, 500)
         return left.join(right, left.key == right.r_key, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {
+        kudo_enabled_conf_key: kudo_enabled
+    })
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # For floating point values the normalization is done using a higher order function. We could probably work around this
 # for now it falls back to the CPU
@@ -205,11 +239,15 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
-def test_sortmerge_join_wrong_key_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_wrong_key_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
         return left.join(right, left.a == right.r_a, join_type)
-    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {
+        kudo_enabled_conf_key: kudo_enabled
+    })
+    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=conf)
 
 # For spark to insert a shuffled hash join it has to be enabled with
 # "spark.sql.join.preferSortMergeJoin" = "false" and both sides have to
@@ -231,10 +269,12 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_non_sized_join_types, ids=idfn)
 @pytest.mark.parametrize('sub_part_enabled', ['false', 'true'], ids=['SubPartition_OFF', 'SubPartition_ON'])
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_join_ridealong_non_sized(data_gen, join_type, sub_part_enabled):
+def test_hash_join_ridealong_non_sized(data_gen, join_type, sub_part_enabled, kudo_enabled):
     confs = {
-        "spark.rapids.sql.test.subPartitioning.enabled": sub_part_enabled
+        "spark.rapids.sql.test.subPartitioning.enabled": sub_part_enabled,
+        kudo_enabled_conf_key: kudo_enabled
     }
     hash_join_ridealong(data_gen, join_type, confs)
 
@@ -242,10 +282,12 @@ def test_hash_join_ridealong_non_sized(data_gen, join_type, sub_part_enabled):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_symmetric_sized_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_join_ridealong_symmetric(data_gen, join_type):
+def test_hash_join_ridealong_symmetric(data_gen, join_type, kudo_enabled):
     confs = {
         "spark.rapids.sql.join.useShuffledSymmetricHashJoin": "true",
+        kudo_enabled_conf_key: kudo_enabled
     }
     hash_join_ridealong(data_gen, join_type, confs)
 
@@ -253,10 +295,12 @@ def test_hash_join_ridealong_symmetric(data_gen, join_type):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 @pytest.mark.parametrize('join_type', all_asymmetric_sized_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_join_ridealong_asymmetric(data_gen, join_type):
+def test_hash_join_ridealong_asymmetric(data_gen, join_type, kudo_enabled):
     confs = {
         "spark.rapids.sql.join.useShuffledAsymmetricHashJoin": "true",
+        kudo_enabled_conf_key: kudo_enabled
     }
     hash_join_ridealong(data_gen, join_type, confs)
 
@@ -267,24 +311,29 @@ def test_hash_join_ridealong_asymmetric(data_gen, join_type):
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_right_table(data_gen, join_type):
+def test_broadcast_join_right_table(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(broadcast(right), left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    conf = {kudo_enabled_conf_key: kudo_enabled}
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = conf)
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', basic_nested_gens + [decimal_gen_128bit], ids=idfn)
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_right_table_ridealong(data_gen, join_type):
+def test_broadcast_join_right_table_ridealong(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_ridealong_df(spark, short_gen, data_gen, 500, 500)
         return left.join(broadcast(right), left.key == right.r_key, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+
+    conf = {kudo_enabled_conf_key: kudo_enabled}
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -293,13 +342,16 @@ def do_join(spark):
 # Not all join types can be translated to a broadcast join, but this tests them to be sure we
 # can handle what spark is doing
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_right_table_with_job_group(data_gen, join_type):
+def test_broadcast_join_right_table_with_job_group(data_gen, join_type, kudo_enabled):
     with_cpu_session(lambda spark : spark.sparkContext.setJobGroup("testjob1", "test", False))
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(broadcast(right), left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+
+    conf = {kudo_enabled_conf_key: kudo_enabled}
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -308,12 +360,16 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen + basic_nested_gens, '1g'),
     (join_small_batch_gens + [basic_struct_gen, ArrayGen(string_gen)], '100')), ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_cartesian_join(data_gen, batch_size):
+def test_cartesian_join(data_gen, batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return left.crossJoin(right)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -322,11 +378,15 @@ def do_join(spark):
 @pytest.mark.xfail(condition=is_databricks_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/334')
 @pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
-def test_cartesian_join_special_case_count(batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_cartesian_join_special_case_count(batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, int_gen, 50, 25)
         return left.crossJoin(right).selectExpr('COUNT(*)')
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -335,11 +395,15 @@ def do_join(spark):
 @pytest.mark.xfail(condition=is_databricks_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/334')
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
-def test_cartesian_join_special_case_group_by_count(batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_cartesian_join_special_case_group_by_count(batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, int_gen, 50, 25)
         return left.crossJoin(right).groupBy('a').count()
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -348,8 +412,9 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen, '1g'),
     (join_small_batch_gens, '100')), ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_cartesian_join_with_condition(data_gen, batch_size):
+def test_cartesian_join_with_condition(data_gen, batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -357,7 +422,10 @@ def do_join(spark):
         # but these take a long time to verify so we run with smaller numbers by default
         # that do not expose the error
         return left.join(right, left.b >= right.r_b, "cross")
-    conf = copy_and_update(_sortmerge_join_conf, {'spark.rapids.sql.batchSizeBytes': batch_size})
+    conf = copy_and_update(_sortmerge_join_conf, {
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
@@ -366,22 +434,30 @@ def do_join(spark):
 @pytest.mark.parametrize('data_gen,batch_size', join_batch_size_test_params(
     (all_gen + basic_nested_gens, '1g'),
     (join_small_batch_gens, '100')), ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_nested_loop_join(data_gen, batch_size):
+def test_broadcast_nested_loop_join(data_gen, batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return left.crossJoin(broadcast(right))
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('batch_size', ['100', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
-def test_broadcast_nested_loop_join_special_case_count(batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_special_case_count(batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, int_gen, 50, 25)
         return left.crossJoin(broadcast(right)).selectExpr('COUNT(*)')
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -389,11 +465,15 @@ def do_join(spark):
 @pytest.mark.xfail(condition=is_databricks_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/334')
 @pytest.mark.parametrize('batch_size', ['1000', '1g'], ids=idfn) # set the batch size so we can test multiple stream batches
-def test_broadcast_nested_loop_join_special_case_group_by_count(batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_special_case_group_by_count(batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, int_gen, 50, 25)
         return left.crossJoin(broadcast(right)).groupBy('a').count()
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -402,8 +482,9 @@ def do_join(spark):
     (join_ast_gen, '1g'),
     ([int_gen], 100)), ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Inner', 'LeftSemi', 'LeftAnti', 'Cross'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_right_broadcast_nested_loop_join_with_ast_condition(data_gen, join_type, batch_size):
+def test_right_broadcast_nested_loop_join_with_ast_condition(data_gen, join_type, batch_size, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -411,14 +492,18 @@ def do_join(spark):
         # but these take a long time to verify so we run with smaller numbers by default
         # that do not expose the error
         return left.join(broadcast(right), (left.b >= right.r_b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.rapids.sql.batchSizeBytes': batch_size})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.rapids.sql.batchSizeBytes': batch_size,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_left_broadcast_nested_loop_join_with_ast_condition(data_gen):
+def test_left_broadcast_nested_loop_join_with_ast_condition(data_gen, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -426,14 +511,15 @@ def do_join(spark):
         # but these take a long time to verify so we run with smaller numbers by default
         # that do not expose the error
         return broadcast(left).join(right, (left.b >= right.r_b), 'Right')
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [IntegerGen(), LongGen(), pytest.param(FloatGen(), marks=[incompat]), pytest.param(DoubleGen(), marks=[incompat])], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Cross'], ids=idfn)
-def test_broadcast_nested_loop_join_with_condition_post_filter(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_with_condition_post_filter(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -442,12 +528,13 @@ def do_join(spark):
         # that do not expose the error
         # AST does not support cast or logarithm yet, so this must be implemented as a post-filter
         return left.join(broadcast(right), left.a > f.log(right.r_a), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [IntegerGen(), LongGen(), pytest.param(FloatGen(), marks=[incompat]), pytest.param(DoubleGen(), marks=[incompat])], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Cross', 'Left', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_broadcast_nested_loop_join_with_condition(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_with_condition(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # AST does not support cast or logarithm yet which is supposed to be extracted into child
@@ -458,39 +545,46 @@ def do_join(spark):
         #    (1) adapt double to integer since AST current doesn't support it.
         #    (2) switch to right side build to pass checks of 'Left', 'LeftSemi', 'LeftAnti' join types
         return left.join(broadcast(right), f.round(left.a).cast('integer') > f.round(f.log(right.r_a).cast('integer')), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={"spark.rapids.sql.castFloatToIntegralTypes.enabled": True})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        "spark.rapids.sql.castFloatToIntegralTypes.enabled": True,
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 @allow_non_gpu('BroadcastExchangeExec', 'BroadcastNestedLoopJoinExec', 'Cast', 'GreaterThan', 'Log')
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [IntegerGen(), LongGen(), pytest.param(FloatGen(), marks=[incompat]), pytest.param(DoubleGen(), marks=[incompat])], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_broadcast_nested_loop_join_with_condition_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_with_condition_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # AST does not support double type which is not split-able into child nodes.
         return broadcast(left).join(right, left.a > f.log(right.r_a), join_type)
-    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec')
+    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec',
+                                conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [byte_gen, short_gen, int_gen, long_gen,
                                       float_gen, double_gen,
                                       string_gen, boolean_gen, date_gen, timestamp_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_nested_loop_join_with_array_contains(data_gen, join_type):
+def test_broadcast_nested_loop_join_with_array_contains(data_gen, join_type, kudo_enabled):
     arr_gen = ArrayGen(data_gen)
     literal = with_cpu_session(lambda spark: gen_scalar(data_gen))
     def do_join(spark):
         left, right = create_df(spark, arr_gen, 50, 25)
         # Array_contains will be pushed down into project child nodes
         return broadcast(left).join(right, array_contains(left.a, literal.cast(data_gen.data_type)) < array_contains(right.r_a, literal.cast(data_gen.data_type)))
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_right_broadcast_nested_loop_join_condition_missing(data_gen, join_type):
+def test_right_broadcast_nested_loop_join_condition_missing(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -500,13 +594,14 @@ def do_join(spark):
         # Compute the distinct of the join result to verify the join produces a proper dataframe
         # for downstream processing.
         return left.join(broadcast(right), how=join_type).distinct()
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Right'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_left_broadcast_nested_loop_join_condition_missing(data_gen, join_type):
+def test_left_broadcast_nested_loop_join_condition_missing(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # This test is impacted by https://github.com/NVIDIA/spark-rapids/issues/294
@@ -516,45 +611,52 @@ def do_join(spark):
         # Compute the distinct of the join result to verify the join produces a proper dataframe
         # for downstream processing.
         return broadcast(left).join(right, how=join_type).distinct()
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_right_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type):
+def test_right_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return left.join(broadcast(right), how=join_type).selectExpr('COUNT(*)')
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + single_level_array_gens + [binary_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Right'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_left_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type):
+def test_left_broadcast_nested_loop_join_condition_missing_count(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return broadcast(left).join(right, how=join_type).selectExpr('COUNT(*)')
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @allow_non_gpu('BroadcastExchangeExec', 'BroadcastNestedLoopJoinExec', 'GreaterThanOrEqual', *non_utc_allow)
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['LeftOuter', 'LeftSemi', 'LeftAnti', 'FullOuter'], ids=idfn)
-def test_broadcast_nested_loop_join_with_conditionals_build_left_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_join_with_conditionals_build_left_fallback(data_gen, join_type,
+                                                                          kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return broadcast(left).join(right, (left.b >= right.r_b), join_type)
-    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec')
+    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec',
+                                conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @allow_non_gpu('BroadcastExchangeExec', 'BroadcastNestedLoopJoinExec', 'GreaterThanOrEqual', *non_utc_allow)
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['RightOuter', 'FullOuter'], ids=idfn)
-def test_broadcast_nested_loop_with_conditionals_build_right_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_loop_with_conditionals_build_right_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         return left.join(broadcast(right), (left.b >= right.r_b), join_type)
-    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec')
+    assert_gpu_fallback_collect(do_join, 'BroadcastNestedLoopJoinExec',
+                                conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -566,25 +668,28 @@ def do_join(spark):
 # Specify 200 shuffle partitions to test cases where streaming side is empty
 # as in https://github.com/NVIDIA/spark-rapids/issues/7516
 @pytest.mark.parametrize('shuffle_conf', [{}, {'spark.sql.shuffle.partitions': 200}], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_left_table(data_gen, join_type, shuffle_conf):
+def test_broadcast_join_left_table(data_gen, join_type, shuffle_conf, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 250, 500)
         return broadcast(left).join(right, left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=shuffle_conf)
+    conf = copy_and_update(shuffle_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', all_join_types, ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_with_conditionals(data_gen, join_type):
+def test_broadcast_join_with_conditionals(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(broadcast(right),
                    (left.a == right.r_a) & (left.b >= right.r_b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -592,14 +697,15 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [long_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_broadcast_join_with_condition_ast_op_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_join_with_condition_ast_op_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # AST does not support cast or logarithm yet
         return left.join(broadcast(right),
                          (left.a == right.r_a) & (left.b > f.log(right.r_b)), join_type)
     exec = 'SortMergeJoinExec' if join_type in ['Right', 'FullOuter'] else 'BroadcastHashJoinExec'
-    assert_gpu_fallback_collect(do_join, exec)
+    assert_gpu_fallback_collect(do_join, exec, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -607,38 +713,42 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_no_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_broadcast_join_with_condition_ast_type_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_join_with_condition_ast_type_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 50, 25)
         # AST does not support cast or logarithm yet
         return left.join(broadcast(right),
                          (left.a == right.r_a) & (left.b > right.r_b), join_type)
     exec = 'SortMergeJoinExec' if join_type in ['Right', 'FullOuter'] else 'BroadcastHashJoinExec'
-    assert_gpu_fallback_collect(do_join, exec)
+    assert_gpu_fallback_collect(do_join, exec, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_no_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Cross'], ids=idfn)
-def test_broadcast_join_with_condition_post_filter(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_join_with_condition_post_filter(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(broadcast(right),
                          (left.a == right.r_a) & (left.b > right.r_b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_sortmerge_join_with_condition_ast(data_gen, join_type):
+def test_sortmerge_join_with_condition_ast(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(right, (left.a == right.r_a) & (left.b >= right.r_b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -646,12 +756,14 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [long_gen], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_sortmerge_join_with_condition_ast_op_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_with_condition_ast_op_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         # AST does not support cast or logarithm yet
         return left.join(right, (left.a == right.r_a) & (left.b > f.log(right.r_b)), join_type)
-    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -659,11 +771,13 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', join_no_ast_gen, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'FullOuter', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_sortmerge_join_with_condition_ast_type_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_with_condition_ast_type_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(right, (left.a == right.r_a) & (left.b > right.r_b), join_type)
-    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=conf)
 
 
 _mixed_df1_with_nulls = [('a', RepeatSeqGen(LongGen(nullable=(True, 20.0)), length= 10)),
@@ -674,20 +788,22 @@ def do_join(spark):
 
 @ignore_order
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti', 'FullOuter', 'Cross'], ids=idfn)
-def test_broadcast_join_mixed(join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_join_mixed(join_type, kudo_enabled):
     def do_join(spark):
         left = gen_df(spark, _mixed_df1_with_nulls, length=500)
         right = gen_df(spark, _mixed_df2_with_nulls, length=500).withColumnRenamed("a", "r_a")\
                 .withColumnRenamed("b", "r_b").withColumnRenamed("c", "r_c")
         return left.join(broadcast(right), left.a.eqNullSafe(right.r_a), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order
 @allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,WriteFilesExec')
 @pytest.mark.xfail(condition=is_emr_runtime(),
     reason='https://github.com/NVIDIA/spark-rapids/issues/821')
 @pytest.mark.parametrize('repartition', ["true", "false"], ids=idfn)
-def test_join_bucketed_table(repartition, spark_tmp_table_factory):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_join_bucketed_table(repartition, spark_tmp_table_factory, kudo_enabled):
     def do_join(spark):
         table_name = spark_tmp_table_factory.get()
         data = [("http://fooblog.com/blog-entry-116.html", "https://fooblog.com/blog-entry-116.html"),
@@ -702,7 +818,10 @@ def do_join(spark):
                 return testurls.repartition(20).join(resolved, "Url", "inner")
         else:
                 return testurls.join(resolved, "Url", "inner")
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf={'spark.sql.autoBroadcastJoinThreshold': '-1'})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf={
+        'spark.sql.autoBroadcastJoinThreshold': '-1',
+        kudo_enabled_conf_key: kudo_enabled
+    })
 
 # Because we disable ShuffleExchangeExec in some cases we need to allow it to not be on the GPU
 # and we do the result sorting in python to avoid that shuffle also being off the GPU
@@ -711,7 +830,8 @@ def do_join(spark):
 @pytest.mark.parametrize('join_type', ['Left', 'Right', 'Inner', 'LeftSemi', 'LeftAnti'], ids=idfn)
 @pytest.mark.parametrize('cache_side', ['cache_left', 'cache_right'], ids=idfn)
 @pytest.mark.parametrize('cpu_side', ['cache', 'not_cache'], ids=idfn)
-def test_half_cache_join(join_type, cache_side, cpu_side):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_half_cache_join(join_type, cache_side, cpu_side, kudo_enabled):
     left_gen = [('a', SetValuesGen(LongType(), range(500))), ('b', IntegerGen())]
     right_gen = [('r_a', SetValuesGen(LongType(), range(500))), ('c', LongGen())]
     def do_join(spark):
@@ -743,46 +863,56 @@ def do_join(spark):
 
     # Even though Spark does not know the size of an RDD input so it will not do a broadcast join unless
     # we tell it to, this is just to be safe
-    assert_gpu_and_cpu_are_equal_collect(do_join, {'spark.sql.autoBroadcastJoinThreshold': '1'})
+    assert_gpu_and_cpu_are_equal_collect(do_join, {
+        'spark.sql.autoBroadcastJoinThreshold': '1',
+        'spark.rapids.shuffle.kudo.serializer.enabled': kudo_enabled
+    })
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_sortmerge_join_struct_as_key(data_gen, join_type):
+def test_sortmerge_join_struct_as_key(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(right, left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_sortmerge_join_struct_mixed_key(data_gen, join_type):
+def test_sortmerge_join_struct_mixed_key(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
         right = two_col_df(spark, data_gen, int_gen, length=500)
         return left.join(right, (left.a == right.a) & (left.b == right.b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_sortmerge_join_struct_mixed_key_with_null_filter(data_gen, join_type):
+def test_sortmerge_join_struct_mixed_key_with_null_filter(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
         right = two_col_df(spark, data_gen, int_gen, length=500)
         return left.join(right, (left.a == right.a) & (left.b == right.b), join_type)
     # Disable constraintPropagation to test null filter on built table with nullable structures.
-    conf = {'spark.sql.constraintPropagation.enabled': 'false', **_sortmerge_join_conf}
+    conf = {'spark.sql.constraintPropagation.enabled': 'false',
+            'spark.rapids.shuffle.kudo.serializer.enabled': kudo_enabled,
+            **_sortmerge_join_conf}
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
@@ -790,25 +920,27 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_right_struct_as_key(data_gen, join_type):
+def test_broadcast_join_right_struct_as_key(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(broadcast(right), left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_broadcast_join_right_struct_mixed_key(data_gen, join_type):
+def test_broadcast_join_right_struct_mixed_key(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left = two_col_df(spark, data_gen, int_gen, length=500)
         right = two_col_df(spark, data_gen, int_gen, length=250)
         return left.join(broadcast(right), (left.a == right.a) & (left.b == right.b), join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # local sort because of https://github.com/NVIDIA/spark-rapids/issues/84
 # After 3.1.0 is the min spark version we can drop this
@@ -816,11 +948,14 @@ def do_join(spark):
 @pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/2140')
 @pytest.mark.parametrize('data_gen', [basic_struct_gen_with_floats], ids=idfn)
 @pytest.mark.parametrize('join_type', ['Inner', 'Left', 'Right', 'Cross', 'LeftSemi', 'LeftAnti'], ids=idfn)
-def test_sortmerge_join_struct_with_floats_key(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_struct_with_floats_key(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 250)
         return left.join(right, left.a == right.r_a, join_type)
-    assert_gpu_and_cpu_are_equal_collect(do_join, conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf,
+                           {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 @allow_non_gpu('SortMergeJoinExec', 'SortExec', 'NormalizeNaNAndZero', 'CreateNamedStruct',
         'GetStructField', 'Literal', 'If', 'IsNull', 'ShuffleExchangeExec', 'HashPartitioning',
@@ -828,15 +963,19 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', struct_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', ['FullOuter'], ids=idfn)
-def test_sortmerge_join_struct_as_key_fallback(data_gen, join_type):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sortmerge_join_struct_as_key_fallback(data_gen, join_type, kudo_enabled):
     def do_join(spark):
         left, right = create_df(spark, data_gen, 500, 500)
         return left.join(right, left.a == right.r_a, join_type)
-    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=_sortmerge_join_conf)
+    conf = copy_and_update(_sortmerge_join_conf,
+                           {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_fallback_collect(do_join, 'SortMergeJoinExec', conf=conf)
 
 # Regression test for https://github.com/NVIDIA/spark-rapids/issues/3775
 @ignore_order(local=True)
-def test_struct_self_join(spark_tmp_table_factory):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_struct_self_join(spark_tmp_table_factory, kudo_enabled):
     def do_join(spark):
         data = [
             (("Adam ", "", "Green"), "1", "M", 1000),
@@ -863,7 +1002,7 @@ def do_join(spark):
         resultdf.createOrReplaceTempView(resultdf_name)
         return spark.sql("select a.* from {} a, {} b where a.name=b.name".format(
             resultdf_name, resultdf_name))
-    assert_gpu_and_cpu_are_equal_collect(do_join)
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf = {kudo_enabled_conf_key: kudo_enabled})
 
 # ExistenceJoin occurs in the context of existential subqueries (which is rewritten to SemiJoin) if
 # there is an additional condition that may qualify left records even though they don't have
@@ -883,7 +1022,9 @@ def do_join(spark):
 ])
 @pytest.mark.parametrize('conditionalJoin', [False, True], ids=['ast:off', 'ast:on'])
 @pytest.mark.parametrize('forceBroadcastHashJoin', [False, True], ids=['broadcastHJ:off', 'broadcastHJ:on'])
-def test_existence_join(numComplementsToExists, aqeEnabled, conditionalJoin, forceBroadcastHashJoin, spark_tmp_table_factory):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_existence_join(numComplementsToExists, aqeEnabled, conditionalJoin,
+                        forceBroadcastHashJoin, spark_tmp_table_factory, kudo_enabled):
     leftTable = spark_tmp_table_factory.get()
     rightTable = spark_tmp_table_factory.get()
     def do_join(spark):
@@ -933,12 +1074,14 @@ def do_join(spark):
     assert_cpu_and_gpu_are_equal_collect_with_capture(do_join, existenceJoinRegex,
         conf={
             "spark.sql.adaptive.enabled": aqeEnabled,
-            "spark.sql.autoBroadcastJoinThreshold": bhjThreshold
+            "spark.sql.autoBroadcastJoinThreshold": bhjThreshold,
+            kudo_enabled_conf_key: kudo_enabled
         })
 
 @ignore_order
 @pytest.mark.parametrize('aqeEnabled', [True, False], ids=['aqe:on', 'aqe:off'])
-def test_existence_join_in_broadcast_nested_loop_join(spark_tmp_table_factory, aqeEnabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_existence_join_in_broadcast_nested_loop_join(spark_tmp_table_factory, aqeEnabled, kudo_enabled):
     left_table_name = spark_tmp_table_factory.get()
     right_table_name = spark_tmp_table_factory.get()
 
@@ -958,11 +1101,13 @@ def do_join(spark):
 
     capture_regexp = r"GpuBroadcastNestedLoopJoin ExistenceJoin\(exists#[0-9]+\),"
     assert_cpu_and_gpu_are_equal_collect_with_capture(do_join, capture_regexp,
-                                                      conf={"spark.sql.adaptive.enabled": aqeEnabled})
+                                                      conf={"spark.sql.adaptive.enabled": aqeEnabled,
+                                                            kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order
 @pytest.mark.parametrize('aqeEnabled', [True, False], ids=['aqe:on', 'aqe:off'])
-def test_degenerate_broadcast_nested_loop_existence_join(spark_tmp_table_factory, aqeEnabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_degenerate_broadcast_nested_loop_existence_join(spark_tmp_table_factory, aqeEnabled, kudo_enabled):
     left_table_name = spark_tmp_table_factory.get()
     right_table_name = spark_tmp_table_factory.get()
 
@@ -982,13 +1127,15 @@ def do_join(spark):
 
     capture_regexp = r"GpuBroadcastNestedLoopJoin ExistenceJoin\(exists#[0-9]+\),"
     assert_cpu_and_gpu_are_equal_collect_with_capture(do_join, capture_regexp,
-                                                      conf={"spark.sql.adaptive.enabled": aqeEnabled})
+                                                      conf={"spark.sql.adaptive.enabled": aqeEnabled,
+                                                            kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize('data_gen', [StringGen(), IntegerGen()], ids=idfn)
 @pytest.mark.parametrize("aqe_enabled", [True, False], ids=idfn)
 @pytest.mark.parametrize("join_reorder_enabled", [True, False], ids=idfn)
-def test_multi_table_hash_join(data_gen, aqe_enabled, join_reorder_enabled):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_multi_table_hash_join(data_gen, aqe_enabled, join_reorder_enabled, kudo_enabled):
     def do_join(spark):
         t1 = binary_op_df(spark, data_gen, length=1000)
         t2 = binary_op_df(spark, data_gen, length=800)
@@ -999,14 +1146,15 @@ def do_join(spark):
                  .join(t4, t3.a == t4.a, 'Inner')
     conf = copy_and_update(_hash_join_conf, {
         'spark.sql.adaptive.enabled': aqe_enabled,
-        'spark.rapids.sql.optimizer.joinReorder.enabled': join_reorder_enabled
+        'spark.rapids.sql.optimizer.joinReorder.enabled': join_reorder_enabled,
+        kudo_enabled_conf_key: kudo_enabled
     })
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=conf)
 
 
 limited_integral_gens = [byte_gen, ShortGen(max_val=BYTE_MAX), IntegerGen(max_val=BYTE_MAX), LongGen(max_val=BYTE_MAX)]
 
-def hash_join_different_key_integral_types(left_gen, right_gen, join_type):
+def hash_join_different_key_integral_types(left_gen, right_gen, join_type, kudo_enabled):
     def do_join(spark):
         left = unary_op_df(spark, left_gen, length=50)
         right = unary_op_df(spark, right_gen, length=500)
@@ -1014,7 +1162,8 @@ def do_join(spark):
     _all_conf = copy_and_update(_hash_join_conf, {
         "spark.rapids.sql.join.useShuffledSymmetricHashJoin": "true",
         "spark.rapids.sql.join.useShuffledAsymmetricHashJoin": "true",
-        "spark.rapids.sql.test.subPartitioning.enabled": True
+        "spark.rapids.sql.test.subPartitioning.enabled": True,
+        kudo_enabled_conf_key: kudo_enabled
     })
     assert_gpu_and_cpu_are_equal_collect(do_join, conf=_all_conf)
 
@@ -1023,24 +1172,27 @@ def do_join(spark):
 @pytest.mark.parametrize('left_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('right_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', all_non_sized_join_types, ids=idfn)
-def test_hash_join_different_key_integral_types_non_sized(left_gen, right_gen, join_type):
-    hash_join_different_key_integral_types(left_gen, right_gen, join_type)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_join_different_key_integral_types_non_sized(left_gen, right_gen, join_type, kudo_enabled):
+    hash_join_different_key_integral_types(left_gen, right_gen, join_type, kudo_enabled)
 
 @validate_execs_in_gpu_plan('GpuShuffledSymmetricHashJoinExec')
 @ignore_order(local=True)
 @pytest.mark.parametrize('left_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('right_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', all_symmetric_sized_join_types, ids=idfn)
-def test_hash_join_different_key_integral_types_symmetric(left_gen, right_gen, join_type):
-    hash_join_different_key_integral_types(left_gen, right_gen, join_type)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_join_different_key_integral_types_symmetric(left_gen, right_gen, join_type, kudo_enabled):
+    hash_join_different_key_integral_types(left_gen, right_gen, join_type, kudo_enabled)
 
 @validate_execs_in_gpu_plan('GpuShuffledAsymmetricHashJoinExec')
 @ignore_order(local=True)
 @pytest.mark.parametrize('left_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('right_gen', limited_integral_gens, ids=idfn)
 @pytest.mark.parametrize('join_type', all_asymmetric_sized_join_types, ids=idfn)
-def test_hash_join_different_key_integral_types_asymmetric(left_gen, right_gen, join_type):
-    hash_join_different_key_integral_types(left_gen, right_gen, join_type)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_join_different_key_integral_types_asymmetric(left_gen, right_gen, join_type, kudo_enabled):
+    hash_join_different_key_integral_types(left_gen, right_gen, join_type, kudo_enabled)
 
 
 bloom_filter_confs = {
@@ -1068,8 +1220,10 @@ def do_join(spark):
 @pytest.mark.parametrize("is_multi_column", [False, True], ids=idfn)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join(batch_size, is_multi_column):
-    conf = {"spark.rapids.sql.batchSizeBytes": batch_size}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join(batch_size, is_multi_column, kudo_enabled):
+    conf = {"spark.rapids.sql.batchSizeBytes": batch_size,
+            kudo_enabled_conf_key: kudo_enabled}
     check_bloom_filter_join(confs=conf,
                             expected_classes="GpuBloomFilterMightContain,GpuBloomFilterAggregate",
                             is_multi_column=is_multi_column)
@@ -1079,8 +1233,10 @@ def test_bloom_filter_join(batch_size, is_multi_column):
 @pytest.mark.parametrize("is_multi_column", [False, True], ids=idfn)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join_cpu_probe(is_multi_column):
-    conf = {"spark.rapids.sql.expression.BloomFilterMightContain": "false"}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join_cpu_probe(is_multi_column, kudo_enabled):
+    conf = {"spark.rapids.sql.expression.BloomFilterMightContain": "false",
+            kudo_enabled_conf_key: kudo_enabled}
     check_bloom_filter_join(confs=conf,
                             expected_classes="BloomFilterMightContain,GpuBloomFilterAggregate",
                             is_multi_column=is_multi_column)
@@ -1090,8 +1246,10 @@ def test_bloom_filter_join_cpu_probe(is_multi_column):
 @pytest.mark.parametrize("is_multi_column", [False, True], ids=idfn)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join_cpu_build(is_multi_column):
-    conf = {"spark.rapids.sql.expression.BloomFilterAggregate": "false"}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join_cpu_build(is_multi_column, kudo_enabled):
+    conf = {"spark.rapids.sql.expression.BloomFilterAggregate": "false",
+            kudo_enabled_conf_key: kudo_enabled}
     check_bloom_filter_join(confs=conf,
                             expected_classes="GpuBloomFilterMightContain,BloomFilterAggregate",
                             is_multi_column=is_multi_column)
@@ -1102,8 +1260,10 @@ def test_bloom_filter_join_cpu_build(is_multi_column):
 @pytest.mark.parametrize("is_multi_column", [False, True], ids=idfn)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join_split_cpu_build(agg_replace_mode, is_multi_column):
-    conf = {"spark.rapids.sql.hashAgg.replaceMode": agg_replace_mode}
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join_split_cpu_build(agg_replace_mode, is_multi_column, kudo_enabled):
+    conf = {"spark.rapids.sql.hashAgg.replaceMode": agg_replace_mode,
+            kudo_enabled_conf_key: kudo_enabled}
     check_bloom_filter_join(confs=conf,
                             expected_classes="GpuBloomFilterMightContain,BloomFilterAggregate,GpuBloomFilterAggregate",
                             is_multi_column=is_multi_column)
@@ -1111,14 +1271,16 @@ def test_bloom_filter_join_split_cpu_build(agg_replace_mode, is_multi_column):
 @ignore_order(local=True)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join_with_merge_some_null_filters(spark_tmp_path):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join_with_merge_some_null_filters(spark_tmp_path, kudo_enabled):
     data_path1 = spark_tmp_path + "/BLOOM_JOIN_DATA1"
     data_path2 = spark_tmp_path + "/BLOOM_JOIN_DATA2"
     with_cpu_session(lambda spark: spark.range(100000).coalesce(1).write.parquet(data_path1))
     with_cpu_session(lambda spark: spark.range(100000).withColumn("id2", col("id").cast("string"))\
                      .coalesce(1).write.parquet(data_path2))
     confs = copy_and_update(bloom_filter_confs,
-                            {"spark.sql.files.maxPartitionBytes": "1000"})
+                            {"spark.sql.files.maxPartitionBytes": "1000",
+                             kudo_enabled_conf_key: kudo_enabled})
     def do_join(spark):
         left = spark.read.parquet(data_path1)
         right = spark.read.parquet(data_path2)
@@ -1128,7 +1290,8 @@ def do_join(spark):
 @ignore_order(local=True)
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/8921")
 @pytest.mark.skipif(is_before_spark_330(), reason="Bloom filter joins added in Spark 3.3.0")
-def test_bloom_filter_join_with_merge_all_null_filters(spark_tmp_path):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_bloom_filter_join_with_merge_all_null_filters(spark_tmp_path, kudo_enabled):
     data_path1 = spark_tmp_path + "/BLOOM_JOIN_DATA1"
     data_path2 = spark_tmp_path + "/BLOOM_JOIN_DATA2"
     with_cpu_session(lambda spark: spark.range(100000).write.parquet(data_path1))
@@ -1138,13 +1301,15 @@ def do_join(spark):
         left = spark.read.parquet(data_path1)
         right = spark.read.parquet(data_path2)
         return right.filter("cast(id2 as bigint) % 3 = 4").join(left, left.id == right.id, "inner")
-    assert_gpu_and_cpu_are_equal_collect(do_join, bloom_filter_confs)
+    conf = copy_and_update(bloom_filter_confs, {kudo_enabled_conf_key: kudo_enabled})
+    assert_gpu_and_cpu_are_equal_collect(do_join, conf)
 
 
 @ignore_order(local=True)
 @allow_non_gpu("ProjectExec", "FilterExec", "BroadcastHashJoinExec", "ColumnarToRowExec", "BroadcastExchangeExec", "BatchScanExec")
 @pytest.mark.parametrize("disable_build", [True, False])
-def test_broadcast_hash_join_fix_fallback_by_inputfile(spark_tmp_path, disable_build):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_hash_join_fix_fallback_by_inputfile(spark_tmp_path, disable_build, kudo_enabled):
     data_path_parquet = spark_tmp_path + "/parquet"
     data_path_orc = spark_tmp_path + "/orc"
     # The smaller one (orc) will be the build side (a broadcast)
@@ -1174,13 +1339,15 @@ def do_join(spark):
         do_join,
         conf={"spark.sql.autoBroadcastJoinThreshold": "10M",
               "spark.sql.sources.useV1SourceList": "",
-              "spark.rapids.sql.input." + scan_name: False})
+              "spark.rapids.sql.input." + scan_name: False,
+              kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)
 @allow_non_gpu("ProjectExec", "BroadcastNestedLoopJoinExec", "ColumnarToRowExec", "BroadcastExchangeExec", "BatchScanExec")
 @pytest.mark.parametrize("disable_build", [True, False])
-def test_broadcast_nested_join_fix_fallback_by_inputfile(spark_tmp_path, disable_build):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_broadcast_nested_join_fix_fallback_by_inputfile(spark_tmp_path, disable_build, kudo_enabled):
     data_path_parquet = spark_tmp_path + "/parquet"
     data_path_orc = spark_tmp_path + "/orc"
     # The smaller one (orc) will be the build side (a broadcast)
@@ -1209,14 +1376,17 @@ def do_join(spark):
         do_join,
         conf={"spark.sql.autoBroadcastJoinThreshold": "-1",
               "spark.sql.sources.useV1SourceList": "",
-              "spark.rapids.sql.input." + scan_name: False})
+              "spark.rapids.sql.input." + scan_name: False,
+              kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True)
 @pytest.mark.parametrize("join_type", ["Inner", "LeftOuter", "RightOuter"], ids=idfn)
 @pytest.mark.parametrize("batch_size", ["500", "1g"], ids=idfn)
-def test_distinct_join(join_type, batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_distinct_join(join_type, batch_size, kudo_enabled):
     join_conf = {
-        "spark.rapids.sql.batchSizeBytes": batch_size
+        "spark.rapids.sql.batchSizeBytes": batch_size,
+        kudo_enabled_conf_key: kudo_enabled
     }
     def do_join(spark):
         left_df = spark.range(1024).withColumn("x", f.col("id") + 1)
@@ -1230,13 +1400,15 @@ def do_join(spark):
 @pytest.mark.parametrize("is_right_host_shuffle", [False, True], ids=idfn)
 @pytest.mark.parametrize("is_left_smaller", [False, True], ids=idfn)
 @pytest.mark.parametrize("batch_size", ["1024", "1g"], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 def test_sized_join(join_type, is_left_host_shuffle, is_right_host_shuffle,
-                    is_left_smaller, batch_size):
+                    is_left_smaller, batch_size, kudo_enabled):
     join_conf = {
         "spark.rapids.sql.join.useShuffledSymmetricHashJoin": "true",
         "spark.rapids.sql.join.useShuffledAsymmetricHashJoin": "true",
         "spark.sql.autoBroadcastJoinThreshold": "1",
-        "spark.rapids.sql.batchSizeBytes": batch_size
+        "spark.rapids.sql.batchSizeBytes": batch_size,
+        kudo_enabled_conf_key: kudo_enabled
     }
     left_size, right_size = (2048, 1024) if is_left_smaller else (1024, 2048)
     def do_join(spark):
@@ -1266,7 +1438,8 @@ def do_join(spark):
 @pytest.mark.parametrize("is_left_smaller", [False, True], ids=idfn)
 @pytest.mark.parametrize("is_ast_supported", [False, True], ids=idfn)
 @pytest.mark.parametrize("batch_size", ["1024", "1g"], ids=idfn)
-def test_sized_join_conditional(join_type, is_ast_supported, is_left_smaller, batch_size):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_sized_join_conditional(join_type, is_ast_supported, is_left_smaller, batch_size, kudo_enabled):
     if join_type != "Inner" and not is_ast_supported:
         pytest.skip("Only inner joins support a non-AST condition")
     join_conf = {
@@ -1274,7 +1447,8 @@ def test_sized_join_conditional(join_type, is_ast_supported, is_left_smaller, ba
         "spark.rapids.sql.join.useShuffledAsymmetricHashJoin": "true",
         "spark.rapids.sql.join.use"
         "spark.sql.autoBroadcastJoinThreshold": "1",
-        "spark.rapids.sql.batchSizeBytes": batch_size
+        "spark.rapids.sql.batchSizeBytes": batch_size,
+        kudo_enabled_conf_key: kudo_enabled
     }
     left_size, right_size = (2048, 1024) if is_left_smaller else (1024, 2048)
     def do_join(spark):
@@ -1300,13 +1474,15 @@ def do_join(spark):
 @pytest.mark.parametrize("is_left_replicated", [False, True], ids=idfn)
 @pytest.mark.parametrize("is_conditional", [False, True], ids=idfn)
 @pytest.mark.parametrize("is_outer_side_small", [False, True], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 def test_sized_join_high_key_replication(join_type, is_left_replicated, is_conditional,
-                                         is_outer_side_small):
+                                         is_outer_side_small, kudo_enabled):
     join_conf = {
         "spark.rapids.sql.join.useShuffledSymmetricHashJoin": "true",
         "spark.rapids.sql.join.useShuffledAsymmetricHashJoin": "true",
         "spark.rapids.sql.join.use"
-        "spark.sql.autoBroadcastJoinThreshold": "1"
+        "spark.sql.autoBroadcastJoinThreshold": "1",
+        kudo_enabled_conf_key: kudo_enabled
     }
     left_size, right_size = (30000, 40000)
     left_key_gen, right_key_gen = (
diff --git a/integration_tests/src/main/python/json_matrix_test.py b/integration_tests/src/main/python/json_matrix_test.py
index 136a4b041f8..50fbe9745dc 100644
--- a/integration_tests/src/main/python/json_matrix_test.py
+++ b/integration_tests/src/main/python/json_matrix_test.py
@@ -66,6 +66,7 @@ def read_json_as_text(spark, data_path, column_name):
 
 WITH_COMMENTS_FILE = "withComments.json"
 WITH_COMMENTS_SCHEMA = StructType([StructField("str", StringType())])
+WITH_COMMENTS_MAP_SCHEMA = MapType(StringType(), StringType())
 
 @allow_non_gpu('FileSourceScanExec')
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -86,6 +87,14 @@ def test_from_json_allow_comments_on(std_input_path):
         'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, 'ProjectExec')
+def test_from_json_allow_comments_on_map(std_input_path):
+    schema = WITH_COMMENTS_MAP_SCHEMA
+    assert_gpu_fallback_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_COMMENTS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowComments': "true"})),
+        'JsonToStructs',
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
 def test_scan_json_allow_comments_off(std_input_path, read_func, spark_tmp_table_factory):
@@ -104,6 +113,14 @@ def test_from_json_allow_comments_off(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_COMMENTS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowComments': "false"})),
         conf =_enable_json_to_structs_conf)
 
+# Off is the default so it really needs to work
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_comments_off_map(std_input_path):
+    schema = WITH_COMMENTS_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_COMMENTS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowComments': "false"})),
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_comments_off(std_input_path):
@@ -119,24 +136,30 @@ def test_json_tuple_allow_comments_off(std_input_path):
 
 WITH_SQ_FILE = "withSingleQuotes.json"
 WITH_SQ_SCHEMA = StructType([StructField("str", StringType())])
+WITH_SQ_MAP_SCHEMA = MapType(StringType(), StringType())
 
 @allow_non_gpu('FileSourceScanExec')
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
 def test_scan_json_allow_single_quotes_off(std_input_path, read_func, spark_tmp_table_factory):
-    assert_gpu_fallback_collect(
+    assert_gpu_and_cpu_are_equal_collect(
         read_func(std_input_path + '/' + WITH_SQ_FILE,
         WITH_SQ_SCHEMA,
         spark_tmp_table_factory,
         {"allowSingleQuotes": "false"}),
-        'FileSourceScanExec',
         conf=_enable_all_types_json_scan_conf)
 
 @allow_non_gpu('ProjectExec', TEXT_INPUT_EXEC)
 def test_from_json_allow_single_quotes_off(std_input_path):
     schema = WITH_SQ_SCHEMA
-    assert_gpu_fallback_collect(
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowSingleQuotes': "false"})),
+        conf =_enable_json_to_structs_conf)
+
+@allow_non_gpu('ProjectExec', TEXT_INPUT_EXEC)
+def test_from_json_allow_single_quotes_off_map(std_input_path):
+    schema = WITH_SQ_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowSingleQuotes': "false"})),
-        'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
 # On is the default so it really needs to work
@@ -157,6 +180,14 @@ def test_from_json_allow_single_quotes_on(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowSingleQuotes': "true"})),
         conf =_enable_json_to_structs_conf)
 
+# On is the default so it really needs to work
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_single_quotes_on_map(std_input_path):
+    schema = WITH_SQ_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_SQ_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowSingleQuotes': "true"})),
+        conf =_enable_json_to_structs_conf)
+
 # On is the default so it really needs to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_single_quotes_on(std_input_path):
@@ -172,6 +203,7 @@ def test_json_tuple_allow_single_quotes_on(std_input_path):
 
 WITH_UNQUOTE_FIELD_NAMES_FILE = "withUnquotedFieldNames.json"
 WITH_UNQUOTE_FIELD_NAMES_SCHEMA = StructType([StructField("str", StringType())])
+WITH_UNQUOTE_FIELD_NAMES_MAP_SCHEMA = MapType(StringType(), StringType())
 
 @allow_non_gpu('FileSourceScanExec')
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -192,6 +224,14 @@ def test_from_json_allow_unquoted_field_names_on(std_input_path):
         'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu('ProjectExec', TEXT_INPUT_EXEC)
+def test_from_json_allow_unquoted_field_names_on_map(std_input_path):
+    schema = WITH_UNQUOTE_FIELD_NAMES_MAP_SCHEMA
+    assert_gpu_fallback_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTE_FIELD_NAMES_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowUnquotedFieldNames': "true"})),
+        'JsonToStructs',
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
 def test_scan_json_allow_unquoted_field_names_off(std_input_path, read_func, spark_tmp_table_factory):
@@ -204,12 +244,20 @@ def test_scan_json_allow_unquoted_field_names_off(std_input_path, read_func, spa
 
 # Off is the default so it really needs to work
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
-def test_from_json_allow_unquoted_field_names_on(std_input_path):
+def test_from_json_allow_unquoted_field_names_off(std_input_path):
     schema = WITH_UNQUOTE_FIELD_NAMES_SCHEMA
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTE_FIELD_NAMES_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowUnquotedFieldNames': "false"})),
         conf =_enable_json_to_structs_conf)
 
+# Off is the default so it really needs to work
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_unquoted_field_names_off_map(std_input_path):
+    schema = WITH_UNQUOTE_FIELD_NAMES_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTE_FIELD_NAMES_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {'allowUnquotedFieldNames': "false"})),
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_unquoted_field_names_off(std_input_path):
@@ -228,6 +276,7 @@ def test_json_tuple_allow_unquoted_field_names_off(std_input_path):
     StructField("int", IntegerType()),
     StructField("float", FloatType()),
     StructField("decimal", DecimalType(10, 3))])
+WITH_NUMERIC_LEAD_ZEROS_MAP_SCHEMA = MapType(StringType(), StringType())
 
 @approximate_float()
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -247,6 +296,13 @@ def test_from_json_allow_numeric_leading_zeros_on(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNumericLeadingZeros": "true"})),
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_numeric_leading_zeros_on_map(std_input_path):
+    schema = WITH_NUMERIC_LEAD_ZEROS_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNumericLeadingZeros": "true"})),
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @approximate_float()
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -268,6 +324,14 @@ def test_from_json_allow_numeric_leading_zeros_off(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNumericLeadingZeros": "false"})),
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_numeric_leading_zeros_off_map(std_input_path):
+    schema = WITH_NUMERIC_LEAD_ZEROS_MAP_SCHEMA
+
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NUMERIC_LEAD_ZEROS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNumericLeadingZeros": "false"})),
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default so it really needs to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_numeric_leading_zeros_off(std_input_path):
@@ -286,6 +350,7 @@ def test_json_tuple_allow_numeric_leading_zeros_off(std_input_path):
 WITH_NONNUMERIC_NUMBERS_SCHEMA = StructType([
     StructField("float", FloatType()),
     StructField("double", DoubleType())])
+WITH_NONNUMERIC_NUMBERS_MAP_SCHEMA = MapType(StringType(), StringType())
 
 @approximate_float()
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -307,6 +372,14 @@ def test_from_json_allow_nonnumeric_numbers_off(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNonNumericNumbers": "false"})),
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+@pytest.mark.xfail(condition = is_before_spark_330(), reason = 'https://github.com/NVIDIA/spark-rapids/issues/10493')
+def test_from_json_allow_nonnumeric_numbers_off_map(std_input_path):
+    schema = WITH_NONNUMERIC_NUMBERS_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNonNumericNumbers": "false"})),
+        conf =_enable_json_to_structs_conf)
+
 # On is the default for scan so it really needs to work
 @approximate_float()
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -329,6 +402,14 @@ def test_from_json_allow_nonnumeric_numbers_on(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNonNumericNumbers": "true"})),
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+@pytest.mark.xfail(condition = is_before_spark_330(), reason = 'https://github.com/NVIDIA/spark-rapids/issues/10493')
+def test_from_json_allow_nonnumeric_numbers_on_map(std_input_path):
+    schema = WITH_NONNUMERIC_NUMBERS_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_NONNUMERIC_NUMBERS_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowNonNumericNumbers": "true"})),
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default for get_json_object so we want this to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_nonnumeric_numbers_off(std_input_path):
@@ -346,6 +427,7 @@ def test_json_tuple_allow_nonnumeric_numbers_off(std_input_path):
 WITH_BS_ESC_FILE = "withBackslashEscapingAnyCharacter.json"
 WITH_BS_ESC_SCHEMA = StructType([
     StructField("str", StringType())])
+WITH_BS_ESC_MAP_SCHEMA = MapType(StringType(), StringType())
 
 # Off is the default for scan so it really needs to work
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
@@ -365,6 +447,14 @@ def test_from_json_allow_backslash_escape_any_off(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_BS_ESC_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowBackslashEscapingAnyCharacter": "false"})),
         conf =_enable_json_to_structs_conf)
 
+# Off is the default for from_json so it really needs to work
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_backslash_escape_any_off_map(std_input_path):
+    schema = WITH_BS_ESC_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_BS_ESC_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowBackslashEscapingAnyCharacter": "false"})),
+        conf =_enable_json_to_structs_conf)
+
 @allow_non_gpu('FileSourceScanExec')
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
 def test_scan_json_allow_backslash_escape_any_on(std_input_path, read_func, spark_tmp_table_factory):
@@ -384,6 +474,14 @@ def test_from_json_allow_backslash_escape_any_on(std_input_path):
         'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, 'ProjectExec')
+def test_from_json_allow_backslash_escape_any_on_map(std_input_path):
+    schema = WITH_BS_ESC_MAP_SCHEMA
+    assert_gpu_fallback_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_BS_ESC_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowBackslashEscapingAnyCharacter": "true"})),
+        'JsonToStructs',
+        conf =_enable_json_to_structs_conf)
+
 # Off is the default for get_json_object so we want this to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_backslash_escape_any_off(std_input_path):
@@ -400,6 +498,7 @@ def test_json_tuple_allow_backslash_escape_any_off(std_input_path):
 WITH_UNQUOTED_CONTROL_FILE = "withUnquotedControlChars.json"
 WITH_UNQUOTED_CONTROL_SCHEMA = StructType([
     StructField("str", StringType())])
+WITH_UNQUOTED_CONTROL_MAP_SCHEMA = MapType(StringType(), StringType())
 
 # Off is the default for scan so it really needs to work
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
@@ -419,6 +518,14 @@ def test_from_json_allow_unquoted_control_chars_off(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTED_CONTROL_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowUnquotedControlChars": "false"})),
         conf =_enable_json_to_structs_conf)
 
+# Off is the default for from_json so it really needs to work
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_unquoted_control_chars_off_map(std_input_path):
+    schema = WITH_UNQUOTED_CONTROL_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTED_CONTROL_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowUnquotedControlChars": "false"})),
+        conf =_enable_json_to_structs_conf)
+
 @pytest.mark.parametrize('read_func', [read_json_df, read_json_sql])
 def test_scan_json_allow_unquoted_control_chars_on(std_input_path, read_func, spark_tmp_table_factory):
     assert_gpu_and_cpu_are_equal_collect(
@@ -435,6 +542,13 @@ def test_from_json_allow_unquoted_control_chars_on(std_input_path):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTED_CONTROL_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowUnquotedControlChars": "true"})),
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_allow_unquoted_control_chars_on_map(std_input_path):
+    schema = WITH_UNQUOTED_CONTROL_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_UNQUOTED_CONTROL_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"allowUnquotedControlChars": "true"})),
+        conf =_enable_json_to_structs_conf)
+
 # On is the default for get_json_object so we want this to work
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_allow_unquoted_control_chars_on(std_input_path):
@@ -453,6 +567,7 @@ def test_json_tuple_allow_unquoted_control_chars_on(std_input_path):
 WITH_DEC_LOCALE_NON_ARIBIC_FILE = "decimal_locale_formatted_strings_non_aribic.json"
 WITH_DEC_LOCALE_SCHEMA = StructType([
     StructField("data", DecimalType(10, 5))])
+WITH_DEC_LOCALE_MAP_SCHEMA = MapType(StringType(), StringType())
 NON_US_DEC_LOCALES=["it-CH","ko-KR","h-TH-x-lvariant-TH","ru-RU","de-DE","iw-IL","hi-IN","ar-QA","zh-CN","ko-KR"]
 
 # US is the default locale so we kind of what it to work
@@ -493,6 +608,23 @@ def test_from_json_dec_locale(std_input_path, locale):
         'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_dec_locale_US_map(std_input_path):
+    schema = WITH_DEC_LOCALE_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
+        conf =_enable_json_to_structs_conf)
+
+# This will not fall back because we only support map<string, string>
+# and locals impact decimal parsing, not strings.
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+@pytest.mark.parametrize('locale', NON_US_DEC_LOCALES)
+def test_from_json_dec_locale_map(std_input_path, locale):
+    schema = WITH_DEC_LOCALE_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"locale": locale})),
+        conf =_enable_json_to_structs_conf)
+
 #There is no way to set a locale for these, and it really should not matter
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_dec_locale(std_input_path):
@@ -551,6 +683,25 @@ def test_from_json_dec_locale_non_aribic(std_input_path, locale):
         'JsonToStructs',
         conf =_enable_json_to_structs_conf)
 
+# This will not fail because we only support map<string, string>
+# and decimal is needed to trigger the translation issue
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_dec_locale_US_non_aribic_map(std_input_path):
+    schema = WITH_DEC_LOCALE_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_NON_ARIBIC_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
+        conf =_enable_json_to_structs_conf)
+
+# This will not fall back because we only support map<string, string>
+# and locals impact decimal parsing, not strings.
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+@pytest.mark.parametrize('locale', NON_US_DEC_LOCALES)
+def test_from_json_dec_locale_non_aribic_map(std_input_path, locale):
+    schema = WITH_DEC_LOCALE_MAP_SCHEMA
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + WITH_DEC_LOCALE_NON_ARIBIC_FILE, "json").select(f.col('json'), f.from_json(f.col('json'), schema, {"locale": locale})),
+        conf =_enable_json_to_structs_conf)
+
 #There is no way to set a locale for these, and it really should not matter
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_dec_locale_non_aribic(std_input_path):
@@ -577,6 +728,7 @@ def test_json_tuple_dec_locale_non_aribic(std_input_path):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -589,6 +741,34 @@ def test_json_tuple_dec_locale_non_aribic(std_input_path):
 COMMON_SCAN_TEST_FILES = COMMON_TEST_FILES + [
     "scan_emtpy_lines.json"]
 
+
+@pytest.mark.parametrize('input_file', [
+    "int_formatted.json",
+    "float_formatted.json",
+    "sci_formatted.json",
+    "int_formatted_strings.json",
+    "float_formatted_strings.json",
+    "sci_formatted_strings.json",
+    "decimal_locale_formatted_strings.json",
+    "single_quoted_strings.json",
+    "boolean_formatted.json",
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    "int_mixed_array_struct_formatted.json",
+    "bad_whitespace.json",
+    "escaped_strings.json",
+    "nested_escaped_strings.json",
+    "repeated_columns.json", # This works for maps, but not others.
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_map_string_string(std_input_path, input_file):
+    schema = MapType(StringType(), StringType())
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
+        conf =_enable_json_to_structs_conf)
+
 @pytest.mark.parametrize('input_file', COMMON_SCAN_TEST_FILES)
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_bytes(std_input_path, read_func, spark_tmp_table_factory, input_file):
@@ -671,6 +851,7 @@ def test_from_json_longs(std_input_path, input_file):
     "invalid_ridealong_columns.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -702,6 +883,7 @@ def test_scan_json_decs(std_input_path, read_func, spark_tmp_table_factory, inpu
     "invalid_ridealong_columns.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -731,10 +913,11 @@ def test_from_json_decs(std_input_path, input_file, dt):
     "invalid_ridealong_columns.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_spark_400_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/11154')),
     "bad_whitespace.json",
     "escaped_strings.json",
-    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11632')),
     pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
     "mixed_objects.json",
     "timestamp_formatted_strings.json",
@@ -761,10 +944,11 @@ def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, i
     "invalid_ridealong_columns.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15318')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
-    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11632')),
     pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
     "mixed_objects.json",
     "timestamp_formatted_strings.json",
@@ -789,6 +973,7 @@ def test_from_json_strings(std_input_path, input_file):
     "invalid_ridealong_columns.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -818,6 +1003,7 @@ def test_get_json_object_formats(std_input_path, input_file):
     "invalid_ridealong_columns.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -857,6 +1043,7 @@ def test_get_json_object_child_formats(std_input_path, input_file):
     "invalid_ridealong_columns.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11386')),
@@ -905,6 +1092,7 @@ def test_from_json_bools(std_input_path, input_file):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -935,6 +1123,7 @@ def test_scan_json_floats(std_input_path, read_func, spark_tmp_table_factory, in
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -963,6 +1152,7 @@ def test_from_json_floats(std_input_path, input_file):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -993,6 +1183,7 @@ def test_scan_json_doubles(std_input_path, read_func, spark_tmp_table_factory, i
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1020,6 +1211,7 @@ def test_from_json_doubles(std_input_path, input_file):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
@@ -1032,7 +1224,9 @@ def test_from_json_doubles(std_input_path, input_file):
 @pytest.mark.parametrize('read_func', [read_json_df])
 @allow_non_gpu(*non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_scan_json_corrected_dates(std_input_path, read_func, spark_tmp_table_factory, input_file):
-    conf = copy_and_update(_enable_all_types_json_scan_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    conf = copy_and_update(_enable_all_types_json_scan_conf, 
+        {"spark.sql.legacy.timeParserPolicy": "CORRECTED",
+         "spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
         read_func(std_input_path + '/' + input_file,
         StructType([StructField("data", DateType())]),
@@ -1051,6 +1245,7 @@ def test_scan_json_corrected_dates(std_input_path, read_func, spark_tmp_table_fa
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
@@ -1062,7 +1257,9 @@ def test_scan_json_corrected_dates(std_input_path, read_func, spark_tmp_table_fa
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_corrected_dates(std_input_path, input_file):
     schema = StructType([StructField("data", DateType())])
-    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    conf = copy_and_update(_enable_json_to_structs_conf, 
+        {"spark.sql.legacy.timeParserPolicy": "CORRECTED",
+         "spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf = conf)
@@ -1079,6 +1276,7 @@ def test_from_json_corrected_dates(std_input_path, input_file):
     pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1091,7 +1289,9 @@ def test_from_json_corrected_dates(std_input_path, input_file):
 @pytest.mark.parametrize('read_func', [read_json_df])
 @allow_non_gpu(*non_utc_allow)
 def test_scan_json_corrected_timestamps(std_input_path, read_func, spark_tmp_table_factory, input_file):
-    conf = copy_and_update(_enable_all_types_json_scan_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    conf = copy_and_update(_enable_all_types_json_scan_conf, 
+        {"spark.sql.legacy.timeParserPolicy": "CORRECTED",
+         "spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
         read_func(std_input_path + '/' + input_file,
         StructType([StructField("data", TimestampType())]),
@@ -1110,6 +1310,7 @@ def test_scan_json_corrected_timestamps(std_input_path, read_func, spark_tmp_tab
     pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1121,7 +1322,9 @@ def test_scan_json_corrected_timestamps(std_input_path, read_func, spark_tmp_tab
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow)
 def test_from_json_corrected_timestamps(std_input_path, input_file):
     schema = StructType([StructField("data", TimestampType())])
-    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    conf = copy_and_update(_enable_json_to_structs_conf, 
+        {"spark.sql.legacy.timeParserPolicy": "CORRECTED",
+         "spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf = conf)
@@ -1138,6 +1341,7 @@ def test_from_json_corrected_timestamps(std_input_path, input_file):
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10573')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11491')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1167,6 +1371,7 @@ def test_scan_json_long_arrays(std_input_path, read_func, spark_tmp_table_factor
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10573')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11491')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1194,6 +1399,7 @@ def test_from_json_long_arrays(std_input_path, input_file):
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10574')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1223,6 +1429,7 @@ def test_scan_json_string_arrays(std_input_path, read_func, spark_tmp_table_fact
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10574')),
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1249,7 +1456,11 @@ def test_from_json_string_arrays(std_input_path, input_file):
     "single_quoted_strings.json",
     "boolean_formatted.json",
     "int_array_formatted.json",
-    pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
+    "int_struct_formatted.json",
+    pytest.param("int_struct_formatted_problematic_rows.json", marks=pytest.mark.xfail(
+        condition=is_before_spark_342() or is_databricks_version_or_later(14, 3),
+        reason='Before Spark 3.4.2? https://github.com/NVIDIA/spark-rapids/issues/10588. '
+               'Databricks 14.3 or later? https://github.com/NVIDIA/spark-rapids/issues/11711.')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1278,7 +1489,11 @@ def test_scan_json_long_structs(std_input_path, read_func, spark_tmp_table_facto
     "single_quoted_strings.json",
     "boolean_formatted.json",
     "int_array_formatted.json",
-    pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
+    "int_struct_formatted.json",
+    pytest.param("int_struct_formatted_problematic_rows.json", marks=pytest.mark.xfail(
+        condition=is_before_spark_342() or is_databricks_version_or_later(14, 3),
+        reason='Before Spark 3.4.2? https://github.com/NVIDIA/spark-rapids/issues/10588. '
+               'Databricks 14.3 or later? https://github.com/NVIDIA/spark-rapids/issues/11711.')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1306,6 +1521,7 @@ def test_from_json_long_structs(std_input_path, input_file):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1335,6 +1551,7 @@ def test_scan_json_string_structs(std_input_path, read_func, spark_tmp_table_fac
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1363,6 +1580,7 @@ def test_from_json_string_structs(std_input_path, input_file):
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10573')), # This does not fail on 38,0
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11491')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1392,6 +1610,7 @@ def test_scan_json_dec_arrays(std_input_path, read_func, spark_tmp_table_factory
     "boolean_formatted.json",
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10573')), # This does not fail on 38,0
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11491')),
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1419,6 +1638,7 @@ def test_from_json_dec_arrays(std_input_path, input_file, dt):
     "boolean_formatted.json",
     "int_array_formatted.json",
     "int_struct_formatted.json",
+    "int_struct_formatted_problematic_rows.json",
     "int_mixed_array_struct_formatted.json",
     "bad_whitespace.json",
     "escaped_strings.json",
@@ -1445,10 +1665,13 @@ def test_scan_json_mixed_struct(std_input_path, read_func, spark_tmp_table_facto
     pytest.param("mixed_objects.json", "data STRUCT<numeric: INT, text: STRING, flag: BOOLEAN, details: STRUCT<timestamp: DATE, list: ARRAY<INT>>>",
         marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11390')),
     ("mixed_objects.json", "company STRUCT<departments: ARRAY<STRUCT<department_name: STRING, employees: ARRAY<STRUCT<name: STRING, role: STRING>>>>>"),
+    ("mixed_objects.json", "MAP<STRING,STRING>")
     ])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_mixed_corrected(std_input_path, input_file, schema):
-    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    conf = copy_and_update(_enable_json_to_structs_conf, 
+        {"spark.sql.legacy.timeParserPolicy": "CORRECTED",
+         "spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr('json',
             "from_json(json, '" + schema + "') as parsed"),
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index fe1d9064933..6e8165846e7 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -200,7 +200,9 @@ def test_json_date_formats_round_trip(spark_tmp_path, date_format, v1_enabled_li
     gen = StructGen([('a', DateGen())], nullable=False)
     data_path = spark_tmp_path + '/JSON_DATA'
     schema = gen.data_type
-    updated_conf = copy_and_update(_enable_all_types_conf, {'spark.sql.sources.useV1SourceList': v1_enabled_list})
+    updated_conf = copy_and_update(_enable_all_types_conf,
+        {'spark.sql.sources.useV1SourceList': v1_enabled_list,
+         'spark.rapids.sql.json.read.datetime.enabled': 'true'})
 
     def create_test_data(spark):
         write = gen_df(spark, gen).write
@@ -238,7 +240,9 @@ def create_test_data(spark):
         write.json(data_path)
 
     with_cpu_session(lambda spark: create_test_data(spark))
-    updated_conf = copy_and_update(_enable_all_types_conf, {'spark.sql.sources.useV1SourceList': v1_enabled_list})
+    updated_conf = copy_and_update(_enable_all_types_conf,
+        {'spark.sql.sources.useV1SourceList': v1_enabled_list,
+         'spark.rapids.sql.json.read.datetime.enabled': 'true'})
 
     def do_read(spark):
         read = spark.read.schema(schema)
@@ -280,7 +284,8 @@ def create_test_data(spark):
     updated_conf = copy_and_update(_enable_all_types_conf,
         {
             'spark.sql.sources.useV1SourceList': v1_enabled_list,
-            'spark.sql.timestampType': timestamp_type
+            'spark.sql.timestampType': timestamp_type,
+            'spark.rapids.sql.json.read.datetime.enabled': 'true'
         })
 
     def do_read(spark):
@@ -343,7 +348,8 @@ def test_basic_json_read(std_input_path, filename, schema, read_func, allow_non_
         allow_numeric_leading_zeros, ansi_enabled, spark_tmp_table_factory, date_format):
     updated_conf = copy_and_update(_enable_all_types_conf,
         {'spark.sql.ansi.enabled': ansi_enabled,
-         'spark.sql.legacy.timeParserPolicy': 'CORRECTED'})
+         'spark.sql.legacy.timeParserPolicy': 'CORRECTED',
+         'spark.rapids.sql.json.read.datetime.enabled': 'true'})
     options = {"allowNonNumericNumbers": allow_non_numeric_numbers,
            "allowNumericLeadingZeros": allow_numeric_leading_zeros,
            }
@@ -390,7 +396,8 @@ def test_basic_from_json(std_input_path, filename, schema, allow_non_numeric_num
         allow_numeric_leading_zeros, ansi_enabled, date_format):
     updated_conf = copy_and_update(_enable_all_types_conf,
         {'spark.sql.ansi.enabled': ansi_enabled,
-         'spark.sql.legacy.timeParserPolicy': 'CORRECTED'})
+         'spark.sql.legacy.timeParserPolicy': 'CORRECTED',
+         'spark.rapids.sql.json.read.datetime.enabled': 'true'})
     options = {"allowNonNumericNumbers": allow_non_numeric_numbers,
            "allowNumericLeadingZeros": allow_numeric_leading_zeros,
            }
@@ -511,7 +518,8 @@ def test_read_optional_fields(spark_tmp_table_factory, std_input_path, read_func
 def test_json_read_valid_dates(std_input_path, filename, schema, read_func, ansi_enabled, time_parser_policy, spark_tmp_table_factory):
     updated_conf = copy_and_update(_enable_all_types_conf,
                                    {'spark.sql.ansi.enabled': ansi_enabled,
-                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy})
+                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy,
+                                    'spark.rapids.sql.json.read.datetime.enabled': 'true'})
     f = read_func(std_input_path + '/' + filename, schema, spark_tmp_table_factory, {})
     if time_parser_policy == 'LEGACY' and ansi_enabled == 'true':
         assert_gpu_fallback_collect(
@@ -546,7 +554,8 @@ def test_json_read_generated_dates(spark_tmp_table_factory, spark_tmp_path, date
 
     updated_conf = copy_and_update(_enable_all_types_conf, {
         'spark.sql.ansi.enabled': ansi_enabled,
-        'spark.sql.legacy.timeParserPolicy': 'CORRECTED'})
+        'spark.sql.legacy.timeParserPolicy': 'CORRECTED',
+        'spark.rapids.sql.json.read.datetime.enabled': 'true'})
 
     options = { 'allowNumericLeadingZeros': allow_numeric_leading_zeros }
     if date_format:
@@ -572,7 +581,8 @@ def test_json_read_invalid_dates(std_input_path, filename, schema, read_func, an
         time_parser_policy, spark_tmp_table_factory):
     updated_conf = copy_and_update(_enable_all_types_conf,
                                    {'spark.sql.ansi.enabled': ansi_enabled,
-                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy })
+                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy,
+                                    'spark.rapids.sql.json.read.datetime.enabled': 'true'})
     options = { 'dateFormat': date_format } if date_format else {}
     f = read_func(std_input_path + '/' + filename, schema, spark_tmp_table_factory, options)
     if time_parser_policy == 'EXCEPTION':
@@ -605,7 +615,8 @@ def test_json_read_valid_timestamps(std_input_path, filename, schema, read_func,
         spark_tmp_table_factory):
     updated_conf = copy_and_update(_enable_all_types_conf,
                                    {'spark.sql.ansi.enabled': ansi_enabled,
-                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy})
+                                    'spark.sql.legacy.timeParserPolicy': time_parser_policy,
+                                    'spark.rapids.sql.json.read.datetime.enabled': 'true'})
     f = read_func(std_input_path + '/' + filename, schema, spark_tmp_table_factory, {})
     assert_gpu_and_cpu_are_equal_collect(f, conf=updated_conf)
 
@@ -668,6 +679,53 @@ def test_from_json_map():
             .select(f.from_json(f.col('a'), 'MAP<STRING,STRING>')),
         conf=_enable_all_types_conf)
 
+@allow_non_gpu(*non_utc_allow)
+def test_from_json_map_with_invalid():
+    # The test here is working around some inconsistencies in how the keys are parsed for maps
+    # on the GPU the keys are dense, but on the CPU they are sparse
+    json_string_gen = StringGen(r'{"a": "[0-9]{0,5}"(, "b": "[A-Z]{0,5}")?}') \
+        .with_special_pattern('', weight=50) \
+        .with_special_pattern('  ', weight=50) \
+        .with_special_pattern('null', weight=50) \
+        .with_special_pattern('invalid', weight=50) \
+        .with_special_pattern(r'{"a": "[0-9]{0,5}"', weight=50) \
+        .with_special_pattern(r'{"a": "[0-9]{0,5}', weight=50) \
+        .with_special_pattern(r'{"a": "[0-9]{0,5}"}abc', weight=50) \
+        .with_special_pattern(r'{"a": "[0-9]{0,5}"}{"b": "B"}', weight=50)
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.from_json(f.col('a'), 'MAP<STRING,STRING>')),
+        conf=_enable_all_types_conf)
+
+@allow_non_gpu(*non_utc_allow)
+@pytest.mark.parametrize('allow_single_quotes', ['true', 'false'])
+@pytest.mark.parametrize('allow_non_numeric_numbers', ['true', 'false'])
+@pytest.mark.parametrize('allow_unquoted_chars', ['true', 'false'])
+def test_from_json_map_with_options(allow_single_quotes,  
+                                    allow_non_numeric_numbers,
+                                    allow_unquoted_chars):
+    # Test the input with:
+    #  - Double quotes
+    #  - Single quotes
+    #  - Numbers with leading zeros
+    #  - Non-numeric numbers
+    #  - Unquoted control characters in quoted strings
+    json_string_gen = StringGen(r'{"a": "[0-9]{0,5}"}') \
+        .with_special_pattern(r"""{'a': "[0-9]{0,5}"}""", weight=50) \
+        .with_special_pattern(r'{"a": 0[0-9]{0,5}}', weight=50) \
+        .with_special_pattern(r'{"a": [+-]?(INF|Infinity|NaN)}', weight=50) \
+        .with_special_pattern(r'{"(a|a\r\n\tb)": "(xyz|01\r\n\t23)"}', weight=50)
+    options = {"allowSingleQuotes": allow_single_quotes,
+                # Cannot test `allowNumericLeadingZeros==true` because the GPU output always has
+                # leading zeros while the CPU output does not, thus test will always fail.
+               "allowNumericLeadingZeros": "false",
+               "allowNonNumericNumbers": allow_non_numeric_numbers,
+               "allowUnquotedControlChars": allow_unquoted_chars}
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen, length=20) \
+            .select(f.from_json(f.col('a'), 'MAP<STRING,STRING>', options)),
+        conf=_enable_all_types_conf)
+
 @allow_non_gpu('ProjectExec', 'JsonToStructs')
 def test_from_json_map_fallback():
     # The test here is working around some inconsistencies in how the keys are parsed for maps
@@ -694,8 +752,7 @@ def test_from_json_map_fallback():
     ])
 @allow_non_gpu(*non_utc_allow)
 def test_from_json_struct(schema):
-    # note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/10534
-    json_string_gen = StringGen(r'{\'a\': [1-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \
+    json_string_gen = StringGen(r'{\'a\': [0-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \
         .with_special_pattern('', weight=50) \
         .with_special_pattern('null', weight=50)
     assert_gpu_and_cpu_are_equal_collect(
@@ -708,8 +765,7 @@ def test_from_json_struct(schema):
     ])
 @allow_non_gpu("ProjectExec")
 def test_from_json_struct_fallback_dupe_keys(schema):
-    # note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/10534
-    json_string_gen = StringGen(r'{\'a\': [1-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \
+    json_string_gen = StringGen(r'{\'a\': [0-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \
         .with_special_pattern('', weight=50) \
         .with_special_pattern('null', weight=50)
     assert_gpu_fallback_collect(
@@ -938,6 +994,36 @@ def test_from_json_struct_of_list(schema):
             .select(f.from_json('a', schema)),
         conf=_enable_all_types_conf)
 
+@allow_non_gpu(*non_utc_allow)
+def test_from_json_struct_of_list_with_mismatched_schema():
+    json_string_gen = StringGen(r'{"teacher": "[A-Z]{1}[a-z]{2,5}",' \
+                                r'"student": \["[A-Z]{1}[a-z]{2,5}"\]}') \
+        .with_special_pattern('', weight=50) \
+        .with_special_pattern('null', weight=50)
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.from_json('a', 'struct<teacher:string,student:array<struct<name:string,class:string>>>')),
+        conf=_enable_all_types_conf)
+
+@pytest.mark.parametrize('schema', ['struct<teacher:string>',
+                                    'struct<student:array<struct<name:string,class:string>>>',
+                                    'struct<teacher:string,student:array<struct<name:string,class:string>>>'])
+@allow_non_gpu(*non_utc_allow)
+@pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/17349')
+def test_from_json_struct_of_list_with_mixed_nested_types_input(schema):
+    json_string_gen = StringGen(r'{"teacher": "[A-Z]{1}[a-z]{2,5}",' \
+                                r'"student": \[{"name": "[A-Z]{1}[a-z]{2,5}", "class": "junior"},' \
+                                r'{"name": "[A-Z]{1}[a-z]{2,5}", "class": "freshman"}\]}') \
+        .with_special_pattern('', weight=50) \
+        .with_special_pattern('null', weight=50) \
+        .with_special_pattern('{"student": \["[A-Z]{1}[a-z]{2,5}"\]}', weight=100) \
+        .with_special_pattern('{"student": \[[1-9]{1,5}\]}', weight=100) \
+        .with_special_pattern('{"student": {"[A-Z]{1}[a-z]{2,5}": "[A-Z]{1}[a-z]{2,5}"}}', weight=100)
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.from_json('a', schema)),
+        conf=_enable_all_types_conf)
+
 @pytest.mark.parametrize('schema', [
     'struct<a:string>'
 ])
@@ -1004,7 +1090,7 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
             conf=all_confs)
 
 
-@pytest.mark.parametrize('data_gen', [byte_gen,
+_to_json_datagens=[byte_gen,
     boolean_gen,
     short_gen,
     int_gen,
@@ -1024,36 +1110,84 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
         .with_special_case('\\\'a\\\''),
     pytest.param(StringGen('\u001a', nullable=True), marks=pytest.mark.xfail(
         reason='https://github.com/NVIDIA/spark-rapids/issues/9705'))
-], ids=idfn)
+]
+
+@pytest.mark.parametrize('data_gen', _to_json_datagens, ids=idfn)
 @pytest.mark.parametrize('ignore_null_fields', [True, False])
-@pytest.mark.parametrize('pretty', [
-    pytest.param(True, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9517')),
-    False
-])
 @pytest.mark.parametrize('timezone', [
     'UTC',
-    'Etc/UTC',
-    pytest.param('UTC+07:00', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    'Etc/UTC'
 ])
-@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
-def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty, timezone):
+@allow_non_gpu(*non_utc_project_allow)
+def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, timezone):
     struct_gen = StructGen([
         ('a', data_gen),
         ("b", StructGen([('child', data_gen)], nullable=True)),
         ("c", ArrayGen(StructGen([('child', data_gen)], nullable=True))),
-        ("d", MapGen(LongGen(nullable=False), data_gen)),
         ("d", MapGen(StringGen('[A-Za-z0-9]{0,10}', nullable=False), data_gen)),
-        ("e", ArrayGen(MapGen(LongGen(nullable=False), data_gen), nullable=True)),
+        ("e", ArrayGen(MapGen(StringGen('[A-Z]{5}', nullable=False), data_gen), nullable=True)),
     ], nullable=False)
     gen = StructGen([('my_struct', struct_gen)], nullable=False)
 
     options = { 'ignoreNullFields': ignore_null_fields,
-                'pretty': pretty,
                 'timeZone': timezone}
 
     def struct_to_json(spark):
         df = gen_df(spark, gen)
-        return df.withColumn("my_json", f.to_json("my_struct", options)).drop("my_struct")
+        return df.select(
+                f.to_json("my_struct", options).alias("ms"))
+
+    conf = copy_and_update(_enable_all_types_conf,
+        { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : struct_to_json(spark),
+        conf=conf)
+
+@pytest.mark.parametrize('data_gen', _to_json_datagens, ids=idfn)
+@pytest.mark.parametrize('ignore_null_fields', [True, False])
+@pytest.mark.parametrize('timezone', [
+    'UTC',
+    'Etc/UTC'
+])
+@allow_non_gpu(*non_utc_project_allow)
+def test_arrays_to_json(spark_tmp_path, data_gen, ignore_null_fields, timezone):
+    array_gen = ArrayGen(data_gen, nullable=True)
+    gen = StructGen([("my_array", array_gen)], nullable=False)
+
+    options = { 'ignoreNullFields': ignore_null_fields,
+                'timeZone': timezone}
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.select(
+                f.to_json("my_array", options).alias("ma"))
+
+    conf = copy_and_update(_enable_all_types_conf,
+        { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : struct_to_json(spark),
+        conf=conf)
+
+@pytest.mark.parametrize('data_gen', _to_json_datagens, ids=idfn)
+@pytest.mark.parametrize('ignore_null_fields', [True, False])
+@pytest.mark.parametrize('timezone', [
+    'UTC',
+    'Etc/UTC'
+])
+@allow_non_gpu(*non_utc_project_allow)
+def test_maps_to_json(spark_tmp_path, data_gen, ignore_null_fields, timezone):
+    map_gen = MapGen(StringGen('[A-Z]{1,10}', nullable=False), data_gen, nullable=True)
+    gen = StructGen([("my_map", map_gen)], nullable=False)
+
+    options = { 'ignoreNullFields': ignore_null_fields,
+                'timeZone': timezone}
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.select(
+                f.to_json("my_map", options).alias("mm"))
 
     conf = copy_and_update(_enable_all_types_conf,
         { 'spark.rapids.sql.expression.StructsToJson': True })
@@ -1064,16 +1198,13 @@ def struct_to_json(spark):
 
 @pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn)
 @pytest.mark.parametrize('timestamp_format', [
-    'yyyy-MM-dd\'T\'HH:mm:ss[.SSS][XXX]',
-    pytest.param('yyyy-MM-dd\'T\'HH:mm:ss.SSSXXX', marks=pytest.mark.allow_non_gpu('ProjectExec')),
-    pytest.param('dd/MM/yyyy\'T\'HH:mm:ss[.SSS][XXX]', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    'yyyy-MM-dd\'T\'HH:mm:ss[.SSS][XXX]'
 ])
 @pytest.mark.parametrize('timezone', [
     'UTC',
-    'Etc/UTC',
-    pytest.param('UTC+07:00', marks=pytest.mark.allow_non_gpu('ProjectExec')),
+    'Etc/UTC'
 ])
-@pytest.mark.skipif(is_not_utc(), reason='Duplicated as original test case designed which it is parameterized by timezone. https://github.com/NVIDIA/spark-rapids/issues/9653.')
+@allow_non_gpu(*non_utc_project_allow)
 def test_structs_to_json_timestamp(spark_tmp_path, data_gen, timestamp_format, timezone):
     struct_gen = StructGen([
         ("b", StructGen([('child', data_gen)], nullable=True)),
@@ -1202,6 +1333,29 @@ def struct_to_json(spark):
         conf=conf)
 
 
+@allow_non_gpu('ProjectExec')
+def test_structs_to_json_fallback_pretty(spark_tmp_path):
+    struct_gen = StructGen([
+        ('a', long_gen),
+        ("b", byte_gen),
+        ("c", ArrayGen(short_gen))
+    ], nullable=False)
+    gen = StructGen([('my_struct', struct_gen)], nullable=False)
+
+    options = { 'pretty': True }
+
+    def struct_to_json(spark):
+        df = gen_df(spark, gen)
+        return df.withColumn("my_json", f.to_json("my_struct", options)).drop("my_struct")
+
+    conf = copy_and_update(_enable_all_types_conf,
+                           { 'spark.rapids.sql.expression.StructsToJson': True })
+
+    assert_gpu_fallback_collect(
+        lambda spark : struct_to_json(spark),
+        'ProjectExec',
+        conf=conf)
+
 #####################################################
 # Some from_json tests ported over from Apache Spark
 #####################################################
@@ -1230,7 +1384,6 @@ def test_spark_from_json():
 # from_json - input=empty array, schema=struct, output=single row with null
 # from_json - input=empty object, schema=struct, output=single row with null
 # SPARK-19543: from_json empty input column
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10483')
 @pytest.mark.parametrize('data', [
     [[r'''[]''']],
     [[r'''{ }''']],
@@ -1300,7 +1453,6 @@ def test_spark_from_json_single_item_array_to_struct():
         lambda spark : spark.createDataFrame(data, 'json STRING').select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf =_enable_all_types_conf)
 
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10484')
 #from_json - input=array, schema=struct, output=single row
 @allow_non_gpu('ProjectExec')
 def test_spark_from_json_struct_with_corrupted_row():
@@ -1375,9 +1527,10 @@ def test_spark_from_json_timestamp_format_option_zoneid_but_default_format(zone_
     schema = StructType([StructField("t", TimestampType())])
     data = [[r'''{"t": "2016-01-01 00:00:00"}'''],
         [r'''{"t": "2023-07-27 12:21:05"}''']]
+    conf = copy_and_update(_enable_all_types_conf, {"spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : spark.createDataFrame(data, 'json STRING').select(f.col('json'), f.from_json(f.col('json'), schema, {'timeZone': zone_id})),
-        conf =_enable_all_types_conf)
+        conf =conf)
 
 # from_json with option (timestampFormat)
 # no timestamp format appears to actually work
@@ -1391,7 +1544,6 @@ def test_spark_from_json_timestamp_format():
         conf =_enable_all_types_conf)
 
 # from_json missing fields
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10489')
 @allow_non_gpu(*non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_spark_from_json_missing_fields_with_cr():
     schema = StructType([StructField("a", LongType(), False), StructField("b", StringType(), False), StructField("c", StringType(), False)])
@@ -1432,9 +1584,10 @@ def test_spark_from_json_date_with_format():
     data = [["""{"time": "26/08/2015"}"""],
             ["""{"time": "01/01/2024"}"""]]
     schema = StructType([StructField("d", DateType())])
+    conf = copy_and_update(_enable_all_types_conf, {"spark.rapids.sql.json.read.datetime.enabled": "true"})
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : spark.createDataFrame(data, 'json STRING').select(f.col('json'), f.from_json(f.col('json'), schema, {'dateFormat': 'dd/MM/yyyy'})),
-        conf =_enable_all_types_conf)
+        conf =conf)
 
 # TEST from_json missing columns
 @allow_non_gpu(*non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
@@ -1446,7 +1599,6 @@ def test_spark_from_json_missing_columns():
         conf =_enable_all_types_conf)
 
 # TEST from_json invalid json
-@pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10483')
 @allow_non_gpu(*non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_spark_from_json_invalid_json():
     schema = StructType([StructField("a", IntegerType())])
@@ -1454,3 +1606,11 @@ def test_spark_from_json_invalid_json():
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : spark.createDataFrame(data, 'json STRING').select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf =_enable_all_types_conf)
+
+@allow_non_gpu(*non_utc_allow)
+def test_from_json_input_wrapped_in_whitespaces():
+    json_string_gen = StringGen(r'[ \r\n\t]{0,5}({"key":( |\r|\n|\t|)"[A-Za-z]{0,5}"}|null|invalid|)[ \r\n\t]{0,5}')
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : unary_op_df(spark, json_string_gen) \
+            .select(f.from_json('a', 'struct<key:string>')),
+        conf=_enable_all_types_conf)
diff --git a/integration_tests/src/main/python/misc_expr_test.py b/integration_tests/src/main/python/misc_expr_test.py
index 3251df08d27..0895d451b9d 100644
--- a/integration_tests/src/main/python/misc_expr_test.py
+++ b/integration_tests/src/main/python/misc_expr_test.py
@@ -19,7 +19,7 @@
 from marks import incompat, approximate_float
 from pyspark.sql.types import *
 import pyspark.sql.functions as f
-from spark_session import is_before_spark_400
+from spark_session import is_databricks_version_or_later, is_spark_400_or_later
 
 def test_mono_id():
     assert_gpu_and_cpu_are_equal_collect(
@@ -34,8 +34,8 @@ def test_part_id():
                 f.spark_partition_id()))
 
 
-@pytest.mark.skipif(condition=not is_before_spark_400(),
-                    reason="raise_error() not currently implemented for Spark 4.0. "
+@pytest.mark.skipif(condition=is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
+                    reason="raise_error() not currently implemented for Spark 4.0, or Databricks 14.3. "
                            "See https://github.com/NVIDIA/spark-rapids/issues/10107.")
 def test_raise_error():
     data_gen = ShortGen(nullable=False, min_val=0, max_val=20, special_cases=[])
diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py
index 789a261b52b..19894d29aa6 100644
--- a/integration_tests/src/main/python/orc_test.py
+++ b/integration_tests/src/main/python/orc_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -112,8 +112,11 @@ def test_basic_read(std_input_path, name, read_func, v1_enabled_list, orc_impl,
 #E                   	at org.apache.orc.TypeDescription.parseInt(TypeDescription.java:244)
 #E                   	at org.apache.orc.TypeDescription.parseType(TypeDescription.java:362)
 # ...
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen
 orc_basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-    string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+    string_gen, DateGen(start=date(1590, 1, 1)),
     orc_timestamp_gen] + decimal_gens
 
 orc_basic_struct_gen = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(orc_basic_gens)])
@@ -201,8 +204,11 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e
             read_func(data_path),
             conf=all_confs)
 
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen
 orc_pred_push_gens = [
-        byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, boolean_gen,
+        byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
         string_gen,
         # Once https://github.com/NVIDIA/spark-rapids/issues/139 is fixed replace this with
         # date_gen
@@ -277,8 +283,11 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
 def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-    string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+    string_gen, DateGen(start=date(1590, 1, 1)),
     orc_timestamp_gen]
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     first_data_path = spark_tmp_path + '/ORC_DATA/key=0/key2=20'
@@ -344,8 +353,11 @@ def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reade
 def test_merge_schema_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-    string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+    string_gen, DateGen(start=date(1590, 1, 1)),
     orc_timestamp_gen]
     first_gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     first_data_path = spark_tmp_path + '/ORC_DATA/key=0'
@@ -825,8 +837,11 @@ def test_read_round_trip_for_multithreaded_combining(spark_tmp_path, gens, keep_
 @pytest.mark.parametrize('keep_order', [True, pytest.param(False, marks=pytest.mark.ignore_order(local=True))])
 @allow_non_gpu(*non_utc_allow_orc_scan)
 def test_simple_partitioned_read_for_multithreaded_combining(spark_tmp_path, keep_order):
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-                string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+                string_gen, DateGen(start=date(1590, 1, 1)),
                 orc_timestamp_gen]
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
     first_data_path = spark_tmp_path + '/ORC_DATA/key=0/key2=20'
@@ -845,12 +860,16 @@ def test_simple_partitioned_read_for_multithreaded_combining(spark_tmp_path, kee
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: spark.read.orc(data_path), conf=all_confs)
 
-@pytest.mark.skipif(is_spark_340_or_later() and (not (is_databricks_runtime() and spark_version() == "3.4.1")), reason="https://github.com/NVIDIA/spark-rapids/issues/8324")
+
+@pytest.mark.skipif(is_spark_340_or_later() and not is_databricks_runtime(),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/8324")
 @pytest.mark.parametrize('data_file', ['fixed-length-char-column-from-hive.orc'])
 @pytest.mark.parametrize('reader', [read_orc_df, read_orc_sql])
 def test_read_hive_fixed_length_char(std_input_path, data_file, reader):
     """
     Test that a file containing CHAR data is readable as STRING.
+    The plugin behaviour matches all Spark versions prior to 3.4.0,
+    and Databricks version 13.3 (i.e. 3.4.1) and after.
     """
     assert_gpu_and_cpu_are_equal_collect(
         reader(std_input_path + '/' + data_file),
@@ -858,20 +877,30 @@ def test_read_hive_fixed_length_char(std_input_path, data_file, reader):
 
 
 @allow_non_gpu("ProjectExec")
-@pytest.mark.skipif(is_before_spark_340() or (is_databricks_runtime() and spark_version() == "3.4.1"), reason="https://github.com/NVIDIA/spark-rapids/issues/8324")
+@pytest.mark.skipif(is_before_spark_340(),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/8324")
+@pytest.mark.skipif(is_databricks_version_or_later(13, 3),
+                    reason="The SELECT * query does not involve ProjectExec "
+                           "on Databricks versions >= 13.3. "
+                           "Can't test Project fallback without ProjectExec.")
 @pytest.mark.parametrize('data_file', ['fixed-length-char-column-from-hive.orc'])
 @pytest.mark.parametrize('reader', [read_orc_df, read_orc_sql])
 def test_project_fallback_when_reading_hive_fixed_length_char(std_input_path, data_file, reader):
     """
-    Test that a file containing CHAR data is readable as STRING.
+    Test that reading a file containing fixed-width CHAR data (e.g. CHAR(3)) as a STRING column
+    causes the ProjectExec to fall back to CPU.
     Note: This test can be removed when
     https://github.com/NVIDIA/spark-rapids/issues/8324 is resolved.
+
+    This test does not apply to Databricks >= 13.3, because there would be
+    no ProjectExec to fall back to CPU.
     """
     assert_gpu_fallback_collect(
         reader(std_input_path + '/' + data_file),
         cpu_fallback_class_name="ProjectExec",
         conf={})
 
+
 @pytest.mark.parametrize('read_func', [read_orc_df, read_orc_sql])
 @pytest.mark.parametrize('v1_enabled_list', ["", "orc"])
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
@@ -913,7 +942,10 @@ def test_orc_column_name_with_dots(spark_tmp_path, reader_confs):
                 ("f.g", int_gen),
                 ("h", string_gen)])),
             ("i.j", long_gen)])),
-        ("k", boolean_gen)]
+        # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+        # https://github.com/rapidsai/cudf/issues/6763 .
+        # Once the first issue is fixed, add back boolean_gen for column k
+        ("k", int_gen)]
     with_cpu_session(lambda spark: gen_df(spark, gens).write.orc(data_path))
     assert_gpu_and_cpu_are_equal_collect(lambda spark: reader(spark), conf=all_confs)
     assert_gpu_and_cpu_are_equal_collect(lambda spark: reader(spark).selectExpr("`a.b`"), conf=all_confs)
@@ -931,7 +963,10 @@ def test_orc_with_null_column(spark_tmp_path, reader_confs):
     def gen_null_df(spark):
         return spark.createDataFrame(
             [(None, None, None, None, None)],
-            "c1 int, c2 long, c3 float, c4 double, c5 boolean")
+            # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+            # https://github.com/rapidsai/cudf/issues/6763 .
+            # Once the first issue is fixed, add back boolean_gen
+            "c1 int, c2 long, c3 float, c4 double, c5 int")
 
     assert_gpu_and_cpu_writes_are_equal_collect(
         lambda spark, path: gen_null_df(spark).write.orc(path),
@@ -952,7 +987,10 @@ def test_orc_with_null_column_with_1m_rows(spark_tmp_path, reader_confs):
     def gen_null_df(spark):
         return spark.createDataFrame(
             data,
-            "c1 int, c2 long, c3 float, c4 double, c5 boolean")
+            # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+            # https://github.com/rapidsai/cudf/issues/6763 .
+            # Once the first issue is fixed, add back boolean_gen
+            "c1 int, c2 long, c3 float, c4 double, c5 int")
     assert_gpu_and_cpu_writes_are_equal_collect(
         lambda spark, path: gen_null_df(spark).write.orc(path),
         lambda spark, path: spark.read.orc(path),
diff --git a/integration_tests/src/main/python/orc_write_test.py b/integration_tests/src/main/python/orc_write_test.py
index f4928196c82..7e415c79a46 100644
--- a/integration_tests/src/main/python/orc_write_test.py
+++ b/integration_tests/src/main/python/orc_write_test.py
@@ -15,7 +15,7 @@
 import pytest
 
 from asserts import assert_gpu_and_cpu_writes_are_equal_collect, assert_gpu_fallback_write
-from spark_session import is_before_spark_320, is_before_spark_400, is_spark_321cdh, is_spark_cdh, with_cpu_session, with_gpu_session
+from spark_session import is_before_spark_320, is_databricks_version_or_later, is_spark_321cdh, is_spark_400_or_later, is_spark_cdh, with_cpu_session, with_gpu_session
 from conftest import is_not_utc
 from datetime import date, datetime, timezone
 from data_gen import *
@@ -24,9 +24,11 @@
 from pyspark.sql.types import *
 
 pytestmark = pytest.mark.nightly_resource_consuming_test
-
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen.
 orc_write_basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
-        string_gen, boolean_gen, DateGen(start=date(1590, 1, 1)),
+        string_gen, DateGen(start=date(1590, 1, 1)),
         TimestampGen(start=datetime(1970, 1, 1, tzinfo=timezone.utc)) ] + \
         decimal_gens
 
@@ -52,7 +54,8 @@
         all_nulls_map_gen,
         all_empty_map_gen]
 
-orc_write_basic_struct_gen = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(orc_write_basic_gens)])
+orc_write_basic_struct_gen = StructGen(
+    [['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(orc_write_basic_gens)])
 
 orc_write_struct_gens_sample = [orc_write_basic_struct_gen,
     StructGen([['child0', byte_gen], ['child1', orc_write_basic_struct_gen]]),
@@ -62,15 +65,18 @@
     ArrayGen(ArrayGen(short_gen, max_length=10), max_length=10),
     ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10),
     ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))]
-
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# https://github.com/rapidsai/cudf/issues/6763 .
+# Once the first issue is fixed, add back boolean_gen.
 orc_write_basic_map_gens = [simple_string_to_string_map_gen] + [MapGen(f(nullable=False), f()) for f in [
-    BooleanGen, ByteGen, ShortGen, IntegerGen, LongGen, FloatGen, DoubleGen,
+    ByteGen, ShortGen, IntegerGen, LongGen, FloatGen, DoubleGen,
     # Using timestamps from 1970 to work around a cudf ORC bug
     # https://github.com/NVIDIA/spark-rapids/issues/140.
     lambda nullable=True: TimestampGen(start=datetime(1970, 1, 1, tzinfo=timezone.utc), nullable=nullable),
     lambda nullable=True: DateGen(start=date(1590, 1, 1), nullable=nullable),
     lambda nullable=True: DecimalGen(precision=15, scale=1, nullable=nullable),
-    lambda nullable=True: DecimalGen(precision=36, scale=5, nullable=nullable)]]
+    lambda nullable=True: DecimalGen(precision=36, scale=5, nullable=nullable)]] + [MapGen(
+    f(nullable=False), f(nullable=False)) for f in [IntegerGen]]
 
 orc_write_gens_list = [orc_write_basic_gens,
         orc_write_struct_gens_sample,
@@ -79,6 +85,7 @@
         pytest.param([date_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/139')),
         pytest.param([timestamp_gen], marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/140'))]
 
+bool_gen = [BooleanGen(nullable=True), BooleanGen(nullable=False)]
 @pytest.mark.parametrize('orc_gens', orc_write_gens_list, ids=idfn)
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
 @allow_non_gpu(*non_utc_allow)
@@ -91,6 +98,30 @@ def test_write_round_trip(spark_tmp_path, orc_gens, orc_impl):
             data_path,
             conf={'spark.sql.orc.impl': orc_impl, 'spark.rapids.sql.format.orc.write.enabled': True})
 
+@pytest.mark.parametrize('orc_gens', [bool_gen], ids=idfn)
+@pytest.mark.parametrize('orc_impl', ["native", "hive"])
+@allow_non_gpu('ExecutedCommandExec', 'DataWritingCommandExec', 'WriteFilesExec')
+def test_write_round_trip_bools_only_fallback(spark_tmp_path, orc_gens, orc_impl):
+    gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
+    data_path = spark_tmp_path + '/ORC_DATA'
+    assert_gpu_and_cpu_writes_are_equal_collect(
+        lambda spark, path: gen_df(spark, gen_list).coalesce(1).write.orc(path),
+        lambda spark, path: spark.read.orc(path),
+        data_path,
+        conf={'spark.sql.orc.impl': orc_impl, 'spark.rapids.sql.format.orc.write.enabled': True})
+
+@pytest.mark.parametrize('orc_gens', [bool_gen], ids=idfn)
+@pytest.mark.parametrize('orc_impl', ["native", "hive"])
+def test_write_round_trip_bools_only_no_fallback(spark_tmp_path, orc_gens, orc_impl):
+    gen_list = [('_c' + str(i), gen) for i, gen in enumerate(orc_gens)]
+    data_path = spark_tmp_path + '/ORC_DATA'
+    assert_gpu_and_cpu_writes_are_equal_collect(
+        lambda spark, path: gen_df(spark, gen_list).coalesce(1).write.orc(path),
+        lambda spark, path: spark.read.orc(path),
+        data_path,
+        conf={'spark.sql.orc.impl': orc_impl, 'spark.rapids.sql.format.orc.write.enabled': True,
+              'spark.rapids.sql.format.orc.write.boolType.enabled': True})
+
 @pytest.mark.parametrize('orc_gen', orc_write_odd_empty_strings_gens_sample, ids=idfn)
 @pytest.mark.parametrize('orc_impl', ["native", "hive"])
 def test_write_round_trip_corner(spark_tmp_path, orc_gen, orc_impl):
@@ -103,7 +134,8 @@ def test_write_round_trip_corner(spark_tmp_path, orc_gen, orc_impl):
             conf={'spark.sql.orc.impl': orc_impl, 'spark.rapids.sql.format.orc.write.enabled': True})
 
 orc_part_write_gens = [
-        byte_gen, short_gen, int_gen, long_gen, boolean_gen,
+        # Add back boolean_gen when  https://github.com/rapidsai/cudf/issues/6763 is fixed
+        byte_gen, short_gen, int_gen, long_gen,
         # Some file systems have issues with UTF8 strings so to help the test pass even there
         StringGen('(\\w| ){0,50}'),
         # Once https://github.com/NVIDIA/spark-rapids/issues/139 is fixed replace this with
@@ -345,7 +377,10 @@ def test_orc_write_column_name_with_dots(spark_tmp_path):
                 ("f.g", int_gen),
                 ("h", string_gen)])),
             ("i.j", long_gen)])),
-        ("k", boolean_gen)]
+        # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+        # https://github.com/rapidsai/cudf/issues/6763 .
+        # Once the first issue is fixed, add back boolean_gen for column k
+        ("k", int_gen)]
     assert_gpu_and_cpu_writes_are_equal_collect(
         lambda spark, path:  gen_df(spark, gens).coalesce(1).write.orc(path),
         lambda spark, path: spark.read.orc(path),
@@ -360,8 +395,8 @@ def test_orc_do_not_lowercase_columns(spark_tmp_path):
 
     # The wording of the `is not exists` error message in Spark 4.x is unfortunate, but accurate:
     # https://github.com/apache/spark/blob/4501285a49e4c0429c9cf2c105f044e1c8a93d21/python/pyspark/errors/error-conditions.json#L487
-    expected_error_message = "No StructField named acol" if is_before_spark_400() else \
-                             "Key `acol` is not exists."
+    expected_error_message = "Key `acol` is not exists." if is_spark_400_or_later() or is_databricks_version_or_later(14, 3) \
+                             else "No StructField named acol"
     assert_gpu_and_cpu_writes_are_equal_collect(
         # column is uppercase
         lambda spark, path: spark.range(0, 1000).select(col("id").alias("Acol")).write.orc(path),
diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py
index f9236f42076..6aa234003ba 100644
--- a/integration_tests/src/main/python/parquet_test.py
+++ b/integration_tests/src/main/python/parquet_test.py
@@ -299,12 +299,19 @@ def test_parquet_read_round_trip_binary_as_string(std_input_path, read_func, rea
 @pytest.mark.parametrize('compress', parquet_compress_options)
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
-def test_parquet_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, reader_confs):
+@pytest.mark.parametrize('cpu_decompress', [True, False])
+def test_parquet_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, reader_confs, cpu_decompress):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     with_cpu_session(
             lambda spark : binary_op_df(spark, long_gen).write.parquet(data_path),
             conf={'spark.sql.parquet.compression.codec': compress})
     all_confs = copy_and_update(reader_confs, {'spark.sql.sources.useV1SourceList': v1_enabled_list})
+    if cpu_decompress:
+        all_confs = copy_and_update(all_confs, {
+            'spark.rapids.sql.format.parquet.decompressCpu' : 'true',
+            'spark.rapids.sql.format.parquet.decompressCpu.snappy' : 'true',
+            'spark.rapids.sql.format.parquet.decompressCpu.zstd' : 'true'
+        })
     assert_gpu_and_cpu_are_equal_collect(
             lambda spark : spark.read.parquet(data_path),
             conf=all_confs)
@@ -517,6 +524,8 @@ def test_parquet_read_buffer_allocation_empty_blocks(spark_tmp_path, v1_enabled_
             lambda spark : spark.read.parquet(data_path).filter("id < 2 or id > 990"),
             conf=all_confs)
 
+
+@disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 @pytest.mark.skipif(is_databricks_runtime(), reason="https://github.com/NVIDIA/spark-rapids/issues/7733")
@@ -829,6 +838,8 @@ def test_parquet_read_nano_as_longs_true(std_input_path):
         'FileSourceScanExec',
         conf=conf)
 
+
+@disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 def test_many_column_project():
     def _create_wide_data_frame(spark, num_cols):
         schema_dict = {}
@@ -1317,27 +1328,64 @@ def test_parquet_read_case_insensitivity(spark_tmp_path):
     )
 
 
-# test read INT32 as INT8/INT16/Date
-@pytest.mark.parametrize('reader_confs', reader_opt_confs)
-@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
-def test_parquet_int32_downcast(spark_tmp_path, reader_confs, v1_enabled_list):
+def run_test_parquet_int32_downcast(spark_tmp_path,
+                                    reader_confs,
+                                    v1_enabled_list,
+                                    ansi_conf):
+    """
+    This tests whether Parquet files with columns written as INT32 can be
+    read as having INT8, INT16 and DATE columns, with ANSI mode enabled/disabled.
+    """
     data_path = spark_tmp_path + '/PARQUET_DATA'
     write_schema = [("d", date_gen), ('s', short_gen), ('b', byte_gen)]
+
+    # For test setup, write with ANSI disabled.
+    # Otherwise, CAST(d AS INT) will fail on Spark CPU.
     with_cpu_session(
         lambda spark: gen_df(spark, write_schema).selectExpr(
             "cast(d as Int) as d",
             "cast(s as Int) as s",
-            "cast(b as Int) as b").write.parquet(data_path))
+            "cast(b as Int) as b").write.parquet(data_path), conf=ansi_disabled_conf)
 
     read_schema = StructType([StructField("d", DateType()),
                               StructField("s", ShortType()),
                               StructField("b", ByteType())])
     conf = copy_and_update(reader_confs,
-                           {'spark.sql.sources.useV1SourceList': v1_enabled_list})
+                           {'spark.sql.sources.useV1SourceList': v1_enabled_list},
+                           ansi_conf)
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: spark.read.schema(read_schema).parquet(data_path),
         conf=conf)
 
+
+@pytest.mark.parametrize('reader_confs', reader_opt_confs)
+@pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
+def test_parquet_int32_downcast_ansi_disabled(spark_tmp_path, reader_confs, v1_enabled_list):
+    """
+    This tests whether Parquet files with columns written as INT32 can be
+    read as having INT8, INT16 and DATE columns, with ANSI mode disabled.
+    """
+    run_test_parquet_int32_downcast(spark_tmp_path,
+                                    reader_confs,
+                                    v1_enabled_list,
+                                    ansi_disabled_conf)
+
+
+def test_parquet_int32_downcast_ansi_enabled(spark_tmp_path):
+    """
+    This is the flipside of test_parquet_int32_downcast_ansi_disabled.
+    This tests whether Parquet files with columns written as INT32 can be
+    read as having INT8, INT16 and DATE columns, now tested with ANSI
+    enabled.
+    A limited combination of test parameters is used to test ANSI enabled,
+    in the interest of brevity.
+    """
+    run_test_parquet_int32_downcast(spark_tmp_path,
+                                    reader_confs=native_parquet_file_reader_conf,
+                                    v1_enabled_list="",
+                                    ansi_conf=ansi_disabled_conf)
+
+
 @pytest.mark.parametrize('reader_confs', reader_opt_confs)
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
 @pytest.mark.parametrize("types", [("byte", "short"), ("byte", "int"), ("short", "int")], ids=idfn)
@@ -1372,6 +1420,10 @@ def test_parquet_nested_column_missing(spark_tmp_path, reader_confs, v1_enabled_
         lambda spark: spark.read.schema(read_schema).parquet(data_path),
         conf=conf)
 
+@pytest.mark.skipif(condition=is_databricks_runtime() and is_databricks_version_or_later(14,3),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/11512")
+@pytest.mark.skipif(condition=is_spark_400_or_later(),
+                    reason="https://github.com/NVIDIA/spark-rapids/issues/11512")
 def test_parquet_check_schema_compatibility(spark_tmp_path):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     gen_list = [('int', int_gen), ('long', long_gen), ('dec32', decimal_gen_32bit)]
@@ -1384,13 +1436,6 @@ def test_parquet_check_schema_compatibility(spark_tmp_path):
         conf={},
         error_message='Parquet column cannot be converted')
 
-    read_dec32_as_dec64 = StructType(
-        [StructField('int', IntegerType()), StructField('dec32', DecimalType(15, 10))])
-    assert_gpu_and_cpu_error(
-        lambda spark: spark.read.schema(read_dec32_as_dec64).parquet(data_path).collect(),
-        conf={},
-        error_message='Parquet column cannot be converted')
-
 
 # For nested types, GPU throws incompatible exception with a different message from CPU.
 def test_parquet_check_schema_compatibility_nested_types(spark_tmp_path):
@@ -1436,6 +1481,75 @@ def test_parquet_check_schema_compatibility_nested_types(spark_tmp_path):
             lambda spark: spark.read.schema(read_map_str_str_as_str_int).parquet(data_path).collect()),
         error_message='Parquet column cannot be converted')
 
+
+@pytest.mark.parametrize('from_decimal_gen, to_decimal_gen', [
+    # Widening precision and scale by the same amount
+    (DecimalGen(5, 2), DecimalGen(7, 4)),
+    (DecimalGen(5, 2), DecimalGen(10, 7)),
+    (DecimalGen(5, 2), DecimalGen(20, 17)),
+    (DecimalGen(10, 2), DecimalGen(12, 4)),
+    (DecimalGen(10, 2), DecimalGen(20, 12)),
+    (DecimalGen(20, 2), DecimalGen(22, 4)),
+    # Increasing precision by larger amount than scale
+    (DecimalGen(5, 2), DecimalGen(6, 3)),
+    (DecimalGen(5, 2), DecimalGen(12, 5)),
+    (DecimalGen(5, 2), DecimalGen(22, 10)),
+    # Narrowing precision and scale
+    (DecimalGen(7, 4), DecimalGen(5, 2)),
+    (DecimalGen(10, 7), DecimalGen(5, 2)),
+    (DecimalGen(20, 17), DecimalGen(5, 2)),
+    # Increasing precision and decreasing scale
+    (DecimalGen(5, 4), DecimalGen(7, 2)),
+    (DecimalGen(10, 6), DecimalGen(12, 4)),
+    (DecimalGen(20, 7), DecimalGen(22, 5)),
+    # Increasing precision by a smaller amount than scale
+    (DecimalGen(5, 2), DecimalGen(6, 4)),
+    (DecimalGen(10, 4), DecimalGen(12, 7))
+], ids=idfn)
+def test_parquet_decimal_precision_scale_change(spark_tmp_path, from_decimal_gen, to_decimal_gen):
+    """Test decimal precision and scale changes when reading Parquet files with RAPIDS acceleration."""
+    data_path = f"{spark_tmp_path}/PARQUET_DECIMAL_DATA"
+
+    # Write test data with CPU
+    with_cpu_session(
+        lambda spark: unary_op_df(spark, from_decimal_gen)
+        .coalesce(1)
+        .write.parquet(data_path)
+    )
+
+    # Create target schema for reading
+    read_schema = StructType([
+        StructField("a", to_decimal_gen.data_type)
+    ])
+
+    # Determine if we expect an error based on precision and scale changes
+    expect_error = (
+            to_decimal_gen.scale < from_decimal_gen.scale or
+            (to_decimal_gen.precision - to_decimal_gen.scale) <
+            (from_decimal_gen.precision - from_decimal_gen.scale)
+    )
+
+    spark_conf = {}
+    if is_before_spark_400():
+        # In Spark versions earlier than 4.0, the vectorized Parquet reader throws an exception
+        # if the read scale differs from the write scale. We disable the vectorized reader,
+        # forcing Spark to use the non-vectorized path for CPU case. This configuration
+        # is ignored by the plugin.
+        spark_conf['spark.sql.parquet.enableVectorizedReader'] = 'false'
+
+    if expect_error:
+        assert_gpu_and_cpu_error(
+            lambda spark: spark.read.schema(read_schema).parquet(data_path).collect(),
+            conf={},
+            error_message="Parquet column cannot be converted"
+        )
+    else:
+        assert_gpu_and_cpu_are_equal_collect(
+            lambda spark: spark.read.schema(read_schema).parquet(data_path),
+            conf=spark_conf
+        )
+
+
 @pytest.mark.skipif(is_before_spark_320() or is_spark_321cdh(), reason='Encryption is not supported before Spark 3.2.0 or Parquet < 1.12')
 @pytest.mark.skipif(os.environ.get('INCLUDE_PARQUET_HADOOP_TEST_JAR', 'false') == 'false', reason='INCLUDE_PARQUET_HADOOP_TEST_JAR is disabled')
 @pytest.mark.parametrize('v1_enabled_list', ["", "parquet"])
@@ -1463,13 +1577,16 @@ def test_parquet_read_encryption(spark_tmp_path, reader_confs, v1_enabled_list):
     assert_spark_exception(
         lambda: with_gpu_session(
             lambda spark: spark.read.parquet(data_path).collect()),
-        error_message='Could not read footer for file')
+        error_message='Could not read footer')  # Common message fragment between all Spark versions.
+                                                # Note that this isn't thrown explicitly by the plugin.
 
     assert_spark_exception(
         lambda: with_gpu_session(
             lambda spark: spark.read.parquet(data_path).collect(), conf=conf),
         error_message='The GPU does not support reading encrypted Parquet files')
 
+
+@disable_ansi_mode  # https://github.com/NVIDIA/spark-rapids/issues/5114
 def test_parquet_read_count(spark_tmp_path):
     parquet_gens = [int_gen, string_gen, double_gen]
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(parquet_gens)]
diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
index 805a0b8137c..775b4a9d1cb 100644
--- a/integration_tests/src/main/python/parquet_write_test.py
+++ b/integration_tests/src/main/python/parquet_write_test.py
@@ -29,6 +29,11 @@
 
 pytestmark = pytest.mark.nightly_resource_consuming_test
 
+conf_key_parquet_datetimeRebaseModeInWrite = 'spark.sql.parquet.datetimeRebaseModeInWrite'
+conf_key_parquet_int96RebaseModeInWrite = 'spark.sql.parquet.int96RebaseModeInWrite'
+conf_key_parquet_datetimeRebaseModeInRead = 'spark.sql.parquet.datetimeRebaseModeInRead'
+conf_key_parquet_int96RebaseModeInRead = 'spark.sql.parquet.int96RebaseModeInRead'
+
 # test with original parquet file reader, the multi-file parallel reader for cloud, and coalesce file reader for
 # non-cloud
 original_parquet_file_reader_conf={'spark.rapids.sql.format.parquet.reader.type': 'PERFILE'}
@@ -37,8 +42,8 @@
 reader_opt_confs = [original_parquet_file_reader_conf, multithreaded_parquet_file_reader_conf,
         coalesce_parquet_file_reader_conf]
 parquet_decimal_struct_gen= StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(decimal_gens)])
-writer_confs={'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': 'CORRECTED',
-              'spark.sql.legacy.parquet.int96RebaseModeInWrite': 'CORRECTED'}
+writer_confs={conf_key_parquet_datetimeRebaseModeInWrite: 'CORRECTED',
+              conf_key_parquet_int96RebaseModeInWrite: 'CORRECTED'}
 
 parquet_basic_gen =[byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
                     string_gen, boolean_gen, date_gen, TimestampGen(), binary_gen]
@@ -158,8 +163,8 @@ def test_write_ts_millis(spark_tmp_path, ts_type, ts_rebase):
         lambda spark, path: unary_op_df(spark, gen).write.parquet(path),
         lambda spark, path: spark.read.parquet(path),
         data_path,
-        conf={'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase,
-              'spark.sql.legacy.parquet.int96RebaseModeInWrite': ts_rebase,
+        conf={conf_key_parquet_datetimeRebaseModeInWrite: ts_rebase,
+              conf_key_parquet_int96RebaseModeInWrite: ts_rebase,
               'spark.sql.parquet.outputTimestampType': ts_type})
 
 
@@ -285,8 +290,8 @@ def test_write_sql_save_table(spark_tmp_path, parquet_gens, spark_tmp_table_fact
 
 def writeParquetUpgradeCatchException(spark, df, data_path, spark_tmp_table_factory, int96_rebase, datetime_rebase, ts_write):
     spark.conf.set('spark.sql.parquet.outputTimestampType', ts_write)
-    spark.conf.set('spark.sql.legacy.parquet.datetimeRebaseModeInWrite', datetime_rebase)
-    spark.conf.set('spark.sql.legacy.parquet.int96RebaseModeInWrite', int96_rebase) # for spark 310
+    spark.conf.set(conf_key_parquet_datetimeRebaseModeInWrite, datetime_rebase)
+    spark.conf.set(conf_key_parquet_int96RebaseModeInWrite, int96_rebase) # for spark 310
     with pytest.raises(Exception) as e_info:
         df.coalesce(1).write.format("parquet").mode('overwrite').option("path", data_path).saveAsTable(spark_tmp_table_factory.get())
     assert e_info.match(r".*SparkUpgradeException.*")
@@ -544,8 +549,8 @@ def generate_map_with_empty_validity(spark, path):
 def test_parquet_write_fails_legacy_datetime(spark_tmp_path, data_gen, ts_write, ts_rebase_write):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     all_confs = {'spark.sql.parquet.outputTimestampType': ts_write,
-                 'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase_write,
-                 'spark.sql.legacy.parquet.int96RebaseModeInWrite': ts_rebase_write}
+                 conf_key_parquet_datetimeRebaseModeInWrite: ts_rebase_write,
+                 conf_key_parquet_int96RebaseModeInWrite: ts_rebase_write}
     def writeParquetCatchException(spark, data_gen, data_path):
         with pytest.raises(Exception) as e_info:
             unary_op_df(spark, data_gen).coalesce(1).write.parquet(data_path)
@@ -563,12 +568,12 @@ def test_parquet_write_roundtrip_datetime_with_legacy_rebase(spark_tmp_path, dat
                                                              ts_rebase_write, ts_rebase_read):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     all_confs = {'spark.sql.parquet.outputTimestampType': ts_write,
-                 'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase_write[0],
-                 'spark.sql.legacy.parquet.int96RebaseModeInWrite': ts_rebase_write[1],
+                 conf_key_parquet_datetimeRebaseModeInWrite: ts_rebase_write[0],
+                 conf_key_parquet_int96RebaseModeInWrite: ts_rebase_write[1],
                  # The rebase modes in read configs should be ignored and overridden by the same
                  # modes in write configs, which are retrieved from the written files.
-                 'spark.sql.legacy.parquet.datetimeRebaseModeInRead': ts_rebase_read[0],
-                 'spark.sql.legacy.parquet.int96RebaseModeInRead': ts_rebase_read[1]}
+                 conf_key_parquet_datetimeRebaseModeInRead: ts_rebase_read[0],
+                 conf_key_parquet_int96RebaseModeInRead: ts_rebase_read[1]}
     assert_gpu_and_cpu_writes_are_equal_collect(
         lambda spark, path: unary_op_df(spark, data_gen).coalesce(1).write.parquet(path),
         lambda spark, path: spark.read.parquet(path),
@@ -597,7 +602,8 @@ def test_it(spark):
             spark.sql("CREATE TABLE {} LOCATION '{}/ctas' AS SELECT * FROM {}".format(
                 ctas_with_existing_name, data_path, src_name))
         except pyspark.sql.utils.AnalysisException as e:
-            if allow_non_empty or e.desc.find('non-empty directory') == -1:
+            description = e._desc if (is_spark_400_or_later() or is_databricks_version_or_later(14, 3)) else e.desc
+            if allow_non_empty or description.find('non-empty directory') == -1:
                 raise e
     with_gpu_session(test_it, conf)
 
@@ -825,8 +831,8 @@ def write_partitions(spark, table_path):
     )
 
 def hive_timestamp_value(spark_tmp_table_factory, spark_tmp_path, ts_rebase, func):
-    conf={'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase,
-          'spark.sql.legacy.parquet.int96RebaseModeInWrite': ts_rebase}
+    conf={conf_key_parquet_datetimeRebaseModeInWrite: ts_rebase,
+          conf_key_parquet_int96RebaseModeInWrite: ts_rebase}
 
     def create_table(spark, path):
         tmp_table = spark_tmp_table_factory.get()
diff --git a/integration_tests/src/main/python/repart_test.py b/integration_tests/src/main/python/repart_test.py
index 7f299373ff6..19759b77f5d 100644
--- a/integration_tests/src/main/python/repart_test.py
+++ b/integration_tests/src/main/python/repart_test.py
@@ -57,6 +57,8 @@
 struct_of_maps = StructGen([['child0', BooleanGen()]] + [
     ['child%d' % (i + 1), gen] for i, gen in enumerate(map_gens)])
 
+kudo_enabled_conf_key = "spark.rapids.shuffle.kudo.serializer.enabled"
+
 @pytest.mark.parametrize('data_gen', [pytest.param((StructGen([['child0', DecimalGen(7, 2)]]),
                                                     StructGen([['child1', IntegerGen()]]))),
                                       # left_struct(child0 = 4 level nested struct, child1 = Int)
@@ -78,11 +80,13 @@
                                        StructGen([['child1', MapGen(BooleanGen(nullable=False), boolean_gen)]], nullable=False))], ids=idfn)
 # This tests the union of DF of structs with different types of cols as long as the struct itself
 # isn't null. This is a limitation in cudf because we don't support nested types as literals
-def test_union_struct_missing_children(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union_struct_missing_children(data_gen, kudo_enabled):
     left_gen, right_gen = data_gen
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : binary_op_df(spark, left_gen).unionByName(binary_op_df(
-            spark, right_gen), True))
+            spark, right_gen), True),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + map_gens + array_gens_sample +
                                      [all_basic_struct_gen,
@@ -90,9 +94,11 @@ def test_union_struct_missing_children(data_gen):
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
 # This tests union of two DFs of two cols each. The types of the left col and right col is the same
-def test_union(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : binary_op_df(spark, data_gen).union(binary_op_df(spark, data_gen)))
+            lambda spark : binary_op_df(spark, data_gen).union(binary_op_df(spark, data_gen)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + map_gens + array_gens_sample +
                                      [all_basic_struct_gen,
@@ -100,9 +106,11 @@ def test_union(data_gen):
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
 # This tests union of two DFs of two cols each. The types of the left col and right col is the same
-def test_unionAll(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_unionAll(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : binary_op_df(spark, data_gen).unionAll(binary_op_df(spark, data_gen)))
+            lambda spark : binary_op_df(spark, data_gen).unionAll(binary_op_df(spark, data_gen)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', all_gen + map_gens + array_gens_sample +
                                      [all_basic_struct_gen,
@@ -114,10 +122,13 @@ def test_unionAll(data_gen):
                                       struct_of_maps], ids=idfn)
 # This tests the union of two DFs of structs with missing child column names. The missing child
 # column will be replaced by nulls in the output DF. This is a feature added in 3.1+
-def test_union_by_missing_col_name(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union_by_missing_col_name(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : binary_op_df(spark, data_gen).withColumnRenamed("a", "x")
-                                .unionByName(binary_op_df(spark, data_gen).withColumnRenamed("a", "y"), True))
+                                .unionByName(binary_op_df(spark, data_gen).withColumnRenamed("a",
+                                                                                             "y"), True),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 # the first number ('1' and '2') is the nest level
@@ -133,7 +144,8 @@ def test_union_by_missing_col_name(data_gen):
                                       nest_1_one, nest_1_two,
                                       nest_2_one, nest_2_two])
 @pytest.mark.skipif(is_before_spark_330(), reason="This is supported only in Spark 3.3.0+")
-def test_union_by_missing_field_name_in_arrays_structs(gen_pair):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union_by_missing_field_name_in_arrays_structs(gen_pair, kudo_enabled):
     """
     This tests the union of two DFs of arrays of structs with missing field names.
     The missing field will be replaced be nulls in the output DF. This is a feature added in 3.3+
@@ -142,8 +154,8 @@ def test_union_by_missing_field_name_in_arrays_structs(gen_pair):
     """
     def assert_union_equal(gen1, gen2):
         assert_gpu_and_cpu_are_equal_collect(
-            lambda spark: unary_op_df(spark, gen1).unionByName(unary_op_df(spark, gen2), True)
-        )
+            lambda spark: unary_op_df(spark, gen1).unionByName(unary_op_df(spark, gen2), True),
+            conf = {kudo_enabled_conf_key: kudo_enabled})
     
     assert_union_equal(gen_pair[0], gen_pair[1])
     assert_union_equal(gen_pair[1], gen_pair[0])
@@ -155,9 +167,12 @@ def assert_union_equal(gen1, gen2):
                                       StructGen([['child0', DecimalGen(7, 2)]]),
                                       nested_struct,
                                       struct_of_maps], ids=idfn)
-def test_union_by_name(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union_by_name(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : binary_op_df(spark, data_gen).unionByName(binary_op_df(spark, data_gen)))
+            lambda spark : binary_op_df(spark, data_gen).unionByName(binary_op_df(spark,
+                                                                                  data_gen)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @pytest.mark.parametrize('data_gen', [
@@ -166,19 +181,23 @@ def test_union_by_name(data_gen):
     pytest.param([('array' + str(i), gen) for i, gen in enumerate(array_gens_sample + [ArrayGen(BinaryGen(max_length=5), max_length=5)])]),
     pytest.param([('map' + str(i), gen) for i, gen in enumerate(map_gens_sample)]),
 ], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_coalesce_types(data_gen):
+def test_coalesce_types(data_gen, kudo_enabled):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: gen_df(spark, data_gen).coalesce(2))
+        lambda spark: gen_df(spark, data_gen).coalesce(2),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('num_parts', [1, 10, 100, 1000, 2000], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_coalesce_df(num_parts, length):
+def test_coalesce_df(num_parts, length, kudo_enabled):
     #This should change eventually to be more than just the basic gens
     gen_list = [('_c' + str(i), gen) for i, gen in enumerate(all_basic_gens + decimal_gens + [binary_gen])]
     assert_gpu_and_cpu_are_equal_collect(
-            lambda spark : gen_df(spark, gen_list, length=length).coalesce(num_parts))
+            lambda spark : gen_df(spark, gen_list, length=length).coalesce(num_parts),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', [
     pytest.param([('_c' + str(i), gen) for i, gen in enumerate(all_basic_gens + decimal_gens + [binary_gen])]),
@@ -188,15 +207,17 @@ def test_coalesce_df(num_parts, length):
 ], ids=idfn)
 @pytest.mark.parametrize('num_parts', [1, 10, 2345], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
 @allow_non_gpu(*non_utc_allow)
-def test_repartition_df(data_gen, num_parts, length):
+def test_repartition_df(data_gen, num_parts, length, kudo_enabled):
     from pyspark.sql.functions import lit
     assert_gpu_and_cpu_are_equal_collect(
             # Add a computed column to avoid shuffle being optimized back to a CPU shuffle
             lambda spark : gen_df(spark, data_gen, length=length).withColumn('x', lit(1)).repartition(num_parts),
             # disable sort before shuffle so round robin works for maps
-            conf = {'spark.sql.execution.sortBeforeRepartition': 'false'})
+            conf = {'spark.sql.execution.sortBeforeRepartition': 'false',
+                    kudo_enabled_conf_key: kudo_enabled})
 
 @pytest.mark.parametrize('data_gen', [
     pytest.param([('_c' + str(i), gen) for i, gen in enumerate(all_basic_gens + decimal_gens)]),
@@ -205,45 +226,53 @@ def test_repartition_df(data_gen, num_parts, length):
 ], ids=idfn)
 @pytest.mark.parametrize('num_parts', [1, 10, 2345], ids=idfn)
 @pytest.mark.parametrize('length', [0, 2048, 4096], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
 @allow_non_gpu(*non_utc_allow)
-def test_repartition_df_for_round_robin(data_gen, num_parts, length):
+def test_repartition_df_for_round_robin(data_gen, num_parts, length, kudo_enabled):
     from pyspark.sql.functions import lit
     assert_gpu_and_cpu_are_equal_collect(
         # Add a computed column to avoid shuffle being optimized back to a CPU shuffle
         lambda spark : gen_df(spark, data_gen, length=length).withColumn('x', lit(1)).repartition(num_parts),
         # Enable sort for round robin partition
-        conf = {'spark.sql.execution.sortBeforeRepartition': 'true'})  # default is true
+        conf = {'spark.sql.execution.sortBeforeRepartition': 'true',
+                kudo_enabled_conf_key: kudo_enabled}) # default is true
 
 @allow_non_gpu('ShuffleExchangeExec', 'RoundRobinPartitioning')
 @pytest.mark.parametrize('data_gen', [[('a', simple_string_to_string_map_gen)]], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
-def test_round_robin_sort_fallback(data_gen):
+def test_round_robin_sort_fallback(data_gen, kudo_enabled):
     from pyspark.sql.functions import lit
     assert_gpu_fallback_collect(
             # Add a computed column to avoid shuffle being optimized back to a CPU shuffle like in test_repartition_df
             lambda spark : gen_df(spark, data_gen).withColumn('extra', lit(1)).repartition(13),
-            'ShuffleExchangeExec')
+            'ShuffleExchangeExec',
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @allow_non_gpu("ProjectExec", "ShuffleExchangeExec")
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
 @pytest.mark.parametrize('num_parts', [2, 10, 17, 19, 32], ids=idfn)
 @pytest.mark.parametrize('gen', [([('ag', ArrayGen(StructGen([('b1', long_gen)])))], ['ag'])], ids=idfn)
-def test_hash_repartition_exact_fallback(gen, num_parts):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_repartition_exact_fallback(gen, num_parts, kudo_enabled):
     data_gen = gen[0]
     part_on = gen[1]
     assert_gpu_fallback_collect(
         lambda spark : gen_df(spark, data_gen, length=1024) \
             .repartition(num_parts, *part_on) \
             .withColumn('id', f.spark_partition_id()) \
-            .selectExpr('*'), "ShuffleExchangeExec")
+            .selectExpr('*'), "ShuffleExchangeExec",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @allow_non_gpu("ProjectExec")
 @pytest.mark.parametrize('data_gen', [ArrayGen(StructGen([('b1', long_gen)]))], ids=idfn)
-def test_hash_fallback(data_gen):
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_hash_fallback(data_gen, kudo_enabled):
     assert_gpu_fallback_collect(
         lambda spark : unary_op_df(spark, data_gen, length=1024) \
-            .selectExpr('*', 'hash(a) as h'), "ProjectExec")
+            .selectExpr('*', 'hash(a) as h'), "ProjectExec",
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 @ignore_order(local=True) # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
 @pytest.mark.parametrize('num_parts', [1, 2, 10, 17, 19, 32], ids=idfn)
@@ -279,8 +308,9 @@ def test_hash_fallback(data_gen):
     ([('a', decimal_gen_64bit), ('b', decimal_gen_64bit), ('c', decimal_gen_64bit)], ['a', 'b', 'c']),
     ([('a', decimal_gen_128bit), ('b', decimal_gen_128bit), ('c', decimal_gen_128bit)], ['a', 'b', 'c']),
     ], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_repartition_exact(gen, num_parts):
+def test_hash_repartition_exact(gen, num_parts, kudo_enabled):
     data_gen = gen[0]
     part_on = gen[1]
     assert_gpu_and_cpu_are_equal_collect(
@@ -288,7 +318,8 @@ def test_hash_repartition_exact(gen, num_parts):
                     .repartition(num_parts, *part_on)\
                     .withColumn('id', f.spark_partition_id())\
                     .withColumn('hashed', f.hash(*part_on))\
-                    .selectExpr('*', 'pmod(hashed, {})'.format(num_parts)))
+                    .selectExpr('*', 'pmod(hashed, {})'.format(num_parts)),
+        conf = {kudo_enabled_conf_key: kudo_enabled})
 
 
 @ignore_order(local=True)  # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
@@ -311,18 +342,19 @@ def test_hash_repartition_exact_longs_no_overflow(num_parts, is_ansi_mode):
 
 @ignore_order(local=True)  # To avoid extra data shuffle by 'sort on Spark' for this repartition test.
 @pytest.mark.parametrize('num_parts', [17], ids=idfn)
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
 @allow_non_gpu(*non_utc_allow)
-def test_hash_repartition_long_overflow_ansi_exception(num_parts):
-    data_gen = [('a', long_gen)]
-    part_on = [f.col('a') + 15]
-    conf = ansi_enabled_conf
+def test_hash_repartition_long_overflow_ansi_exception(num_parts, kudo_enabled):
+    conf = copy_and_update(ansi_enabled_conf, {kudo_enabled_conf_key: kudo_enabled})
 
     def test_function(spark):
-        return gen_df(spark, data_gen, length=1024) \
-            .withColumn('plus15', f.col('a') + 15) \
-            .repartition(num_parts, f.col('plus15')) \
+        df = gen_df(spark, [('a', long_gen)], length=1024)
+        maxVal = df.selectExpr("max(a) as m").head()['m']
+        overflowVal = (1 << 63) - maxVal
+        return df.withColumn('plus', f.col('a') + overflowVal) \
+            .repartition(num_parts, f.col('plus')) \
             .withColumn('id', f.spark_partition_id()) \
-            .withColumn('hashed', f.hash(*part_on)) \
+            .withColumn('hashed', f.hash(f.col('a') + overflowVal)) \
             .selectExpr('*', 'pmod(hashed, {})'.format(num_parts))
 
     assert_gpu_and_cpu_error(
@@ -332,11 +364,13 @@ def test_function(spark):
 
 # Test a query that should cause Spark to leverage getShuffleRDD
 @ignore_order(local=True)
-def test_union_with_filter():
+@pytest.mark.parametrize("kudo_enabled", ["true", "false"], ids=idfn)
+def test_union_with_filter(kudo_enabled):
     def doit(spark):
         dfa = spark.range(1, 100).withColumn("id2", f.col("id"))
         dfb = dfa.groupBy("id").agg(f.size(f.collect_set("id2")).alias("idc"))
         dfc = dfb.filter(f.col("idc") == 1).select("id")
         return dfc.union(dfc)
-    conf = { "spark.sql.adaptive.enabled": "true" }
+    conf = { "spark.sql.adaptive.enabled": "true",
+             kudo_enabled_conf_key: kudo_enabled}
     assert_gpu_and_cpu_are_equal_collect(doit, conf)
diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py
index ff501324cc0..57af4a1126e 100644
--- a/integration_tests/src/main/python/schema_evolution_test.py
+++ b/integration_tests/src/main/python/schema_evolution_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,7 +34,9 @@
 
 # List of additional column data generators to use when adding columns
 _additional_gens = [
-    boolean_gen,
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen
     byte_gen,
     short_gen,
     int_gen,
@@ -49,7 +51,10 @@
     # simple_string_to_string_map_gen),
     ArrayGen(_custom_date_gen),
     struct_gen_decimal128,
-    StructGen([("c0", ArrayGen(long_gen)), ("c1", boolean_gen)]),
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # https://github.com/rapidsai/cudf/issues/6763 .
+    # Once the first issue is fixed, add back boolean_gen from int_gen for c1
+    StructGen([("c0", ArrayGen(long_gen)), ("c1", int_gen)]),
 ]
 
 def get_additional_columns():
diff --git a/integration_tests/src/main/python/string_test.py b/integration_tests/src/main/python/string_test.py
index 7099436f608..03c022bb201 100644
--- a/integration_tests/src/main/python/string_test.py
+++ b/integration_tests/src/main/python/string_test.py
@@ -23,7 +23,7 @@
 from pyspark.sql.types import *
 import pyspark.sql.utils
 import pyspark.sql.functions as f
-from spark_session import with_cpu_session, with_gpu_session, is_databricks104_or_later, is_before_spark_320, is_before_spark_400
+from spark_session import with_cpu_session, with_gpu_session, is_databricks104_or_later, is_databricks_version_or_later, is_before_spark_320, is_spark_400_or_later
 
 _regexp_conf = { 'spark.rapids.sql.regexp.enabled': 'true' }
 
@@ -104,10 +104,6 @@ def test_substring_index(data_gen,delim):
 
 
 @allow_non_gpu('ProjectExec')
-@pytest.mark.skipif(condition=not is_before_spark_400(),
-                    reason="Bug in Apache Spark 4.0 causes NumberFormatExceptions from substring_index(), "
-                           "if called with index==null. For further information, see: "
-                           "https://issues.apache.org/jira/browse/SPARK-48989.")
 @pytest.mark.parametrize('data_gen', [mk_str_gen('([ABC]{0,3}_?){0,7}')], ids=idfn)
 def test_unsupported_fallback_substring_index(data_gen):
     delim_gen = StringGen(pattern="_")
@@ -327,6 +323,10 @@ def test_rtrim(data_gen):
                 'TRIM(TRAILING NULL FROM a)',
                 'TRIM(TRAILING "" FROM a)'))
 
+@pytest.mark.skipif(condition=is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
+                    reason="startsWith(None)/endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0, "
+                           "and Databricks 14.3."
+                           "See https://issues.apache.org/jira/browse/SPARK-48995.")
 def test_startswith():
     gen = mk_str_gen('[Ab\ud720]{3}A.{0,3}Z[Ab\ud720]{3}')
     assert_gpu_and_cpu_are_equal_collect(
@@ -351,8 +351,9 @@ def assert_gpu_did_fallback(op):
     assert_gpu_did_fallback(f.col("a").startswith(f.col("a")))
 
 
-@pytest.mark.skipif(condition=not is_before_spark_400(),
-                    reason="endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0. "
+@pytest.mark.skipif(condition=is_spark_400_or_later() or is_databricks_version_or_later(14, 3),
+                    reason="startsWith(None)/endswith(None) seems to cause an NPE in Column.fn() on Apache Spark 4.0, "
+                           "and Databricks 14.3."
                            "See https://issues.apache.org/jira/browse/SPARK-48995.")
 def test_endswith():
     gen = mk_str_gen('[Ab\ud720]{3}A.{0,3}Z[Ab\ud720]{3}')
diff --git a/integration_tests/src/main/python/url_test.py b/integration_tests/src/main/python/url_test.py
index 9d601c72675..e1bf9c821a8 100644
--- a/integration_tests/src/main/python/url_test.py
+++ b/integration_tests/src/main/python/url_test.py
@@ -148,7 +148,7 @@
 
 url_gen = StringGen(url_pattern)
 
-supported_parts = ['PROTOCOL', 'HOST', 'QUERY', 'PATH']
+supported_parts = ['PROTOCOL', 'HOST', 'QUERY', 'PATH', 'invalid', 'path']
 unsupported_parts = ['REF', 'FILE', 'AUTHORITY', 'USERINFO']
     
 @pytest.mark.parametrize('data_gen', [url_gen, edge_cases_gen], ids=idfn)
diff --git a/integration_tests/src/test/resources/int_struct_formatted.json b/integration_tests/src/test/resources/int_struct_formatted.json
index e3ac75fbf14..34e9557b461 100644
--- a/integration_tests/src/test/resources/int_struct_formatted.json
+++ b/integration_tests/src/test/resources/int_struct_formatted.json
@@ -2,4 +2,3 @@
 {"data": {"A": 1}}
 {"data": {"B": 50}}
 {"data": {"B": -128, "A": 127}}
-{"data": {"B": 99999999999999999999, "A": -9999999999999999999}}
diff --git a/integration_tests/src/test/resources/int_struct_formatted_problematic_rows.json b/integration_tests/src/test/resources/int_struct_formatted_problematic_rows.json
new file mode 100644
index 00000000000..c51e69b5297
--- /dev/null
+++ b/integration_tests/src/test/resources/int_struct_formatted_problematic_rows.json
@@ -0,0 +1,2 @@
+{"data": {"B": 99999999999999999999, "A": -9999999999999999999}}
+{"data": {"A": 0, "B": "0"}}
diff --git a/integration_tests/src/test/scala/com/nvidia/spark/rapids/functionsSuite.scala b/integration_tests/src/test/scala/com/nvidia/spark/rapids/functionsSuite.scala
new file mode 100644
index 00000000000..04e2f8a48f1
--- /dev/null
+++ b/integration_tests/src/test/scala/com/nvidia/spark/rapids/functionsSuite.scala
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import com.nvidia.spark.functions._
+
+import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.api.java._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.nvidia.SparkTestBase
+import org.apache.spark.sql.types._
+
+class functionsSuite extends SparkTestBase {
+  test("basic 0 arg df_udf") {
+    val zero = df_udf(() => lit(0))
+    withSparkSession{ spark =>
+      spark.udf.register("zero", zero)
+      assertSame(Array(
+        Row(0L, 0),
+        Row(1L, 0)),
+        spark.range(2).selectExpr("id", "zero()").collect())
+      assertSame(Array(
+        Row(0L, 0),
+        Row(1L, 0)),
+        spark.range(2).select(col("id"), zero()).collect())
+    }
+  }
+
+  test("basic 1 arg df_udf") {
+    val inc = df_udf((input: Column) => input + 1)
+    withSparkSession { spark =>
+      spark.udf.register("inc", inc)
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 2L)),
+        spark.range(2).selectExpr("id", "inc(id)").collect())
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 2L)),
+        spark.range(2).select(col("id"), inc(col("id"))).collect())
+    }
+  }
+
+
+  test("basic 2 arg df_udf") {
+    val add = df_udf((a: Column, b:Column) => a + b)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 2L)),
+        spark.range(2).selectExpr("id", "add(id, id)").collect())
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 2L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"))).collect())
+    }
+  }
+
+  test("basic 3 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column) => a + b + c)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 3L)),
+        spark.range(2).selectExpr("id", "add(id, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 3L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), col("id"))).collect())
+    }
+  }
+
+  test("basic 4 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column) => a + b + c + d)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 4L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id)").collect())
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 4L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1), col("id"))).collect())
+    }
+  }
+
+  test("basic 5 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column) =>
+      a + b + c + d + e)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 5L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 5L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1))).collect())
+    }
+  }
+
+  test("basic 6 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column, f:Column) =>
+      a + b + c + d + e + f)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 6L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 6L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"))).collect())
+    }
+  }
+
+  test("basic 7 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column) => a + b + c + d + e + f + g)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 7L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 7L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"))).collect())
+    }
+  }
+
+  test("basic 8 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column, h:Column) => a + b + c + d + e + f + g + h)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 9L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 9L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2))).collect())
+    }
+  }
+
+  test("basic 9 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column, h:Column, i:Column) =>
+      a + b + c + d + e + f + g + h + i)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 10L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2, id)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 10L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2), col("id"))).collect())
+    }
+  }
+
+  test("basic 10 arg df_udf") {
+    val add = df_udf((a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column, h:Column, i:Column, j:Column) =>
+      a + b + c + d + e + f + g + h + i + j)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 11L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 11L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2), col("id"), col("id"))).collect())
+    }
+  }
+
+  test("nested df_udf") {
+    val add = df_udf((a: Column, b:Column) => a + b)
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 22L),
+        Row(1L, 25L)),
+        spark.range(2).selectExpr("id", "add(add(id, 12), add(add(id, id), 10))").collect())
+    }
+  }
+
+  test("complex df_udf") {
+    val extractor = df_udf((json: Column) => {
+      val schema = StructType(Seq(StructField("values", ArrayType(LongType))))
+      val extracted_json = from_json(json, schema, Map.empty[String, String])
+      aggregate(extracted_json("values"),
+        lit(0L),
+        (a, b) => coalesce(a, lit(0L)) + coalesce(b, lit(0L)),
+        a => a)
+    })
+    withSparkSession { spark =>
+      import spark.implicits._
+      spark.udf.register("extractor", extractor)
+      assertSame(Array(
+        Row(6L),
+        Row(3L)),
+        Seq("""{"values":[1,2,3]}""",
+        """{"values":[1, null, null, 2]}""").toDF("json").selectExpr("extractor(json)").collect())
+    }
+  }
+
+  test("j basic 0 arg df_udf") {
+    val zero = df_udf(new UDF0[Column] {
+      override def call(): Column = lit(0)
+    })
+    withSparkSession{ spark =>
+      spark.udf.register("zero", zero)
+      assertSame(Array(
+        Row(0L, 0),
+        Row(1L, 0)),
+        spark.range(2).selectExpr("id", "zero()").collect())
+      assertSame(Array(
+        Row(0L, 0),
+        Row(1L, 0)),
+        spark.range(2).select(col("id"), zero()).collect())
+    }
+  }
+
+  test("jbasic 1 arg df_udf") {
+    val inc = df_udf(new UDF1[Column, Column] {
+      override def call(a: Column): Column = a + 1
+    })
+    withSparkSession { spark =>
+      spark.udf.register("inc", inc)
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 2L)),
+        spark.range(2).selectExpr("id", "inc(id)").collect())
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 2L)),
+        spark.range(2).select(col("id"), inc(col("id"))).collect())
+    }
+  }
+
+  test("jbasic 2 arg df_udf") {
+    val add = df_udf(new UDF2[Column, Column, Column] {
+      override def call(a: Column, b:Column): Column = a + b
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 2L)),
+        spark.range(2).selectExpr("id", "add(id, id)").collect())
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 2L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 3 arg df_udf") {
+    val add = df_udf(new UDF3[Column, Column, Column, Column] {
+      override def call(a: Column, b: Column, c: Column): Column = a + b + c
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 3L)),
+        spark.range(2).selectExpr("id", "add(id, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 0L),
+        Row(1L, 3L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 4 arg df_udf") {
+    val add = df_udf(new UDF4[Column, Column, Column, Column, Column] {
+      override def call(a: Column, b:Column, c:Column, d:Column): Column = a + b + c + d
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 4L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id)").collect())
+      assertSame(Array(
+        Row(0L, 1L),
+        Row(1L, 4L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 5 arg df_udf") {
+    val add = df_udf(new UDF5[Column, Column, Column, Column, Column, Column] {
+      override def call(a: Column, b: Column, c: Column, d: Column, e: Column): Column =
+        a + b + c + d + e
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 5L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 5L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1))).collect())
+    }
+  }
+
+  test("jbasic 6 arg df_udf") {
+    val add = df_udf(new UDF6[Column, Column, Column, Column, Column, Column, Column] {
+      override def call(a: Column, b:Column, c:Column, d:Column, e:Column, f:Column) =
+      a + b + c + d + e + f
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 6L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 6L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 7 arg df_udf") {
+    val add = df_udf(new UDF7[Column, Column, Column, Column, Column, Column, Column,
+      Column] {
+      override def call(a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column): Column = a + b + c + d + e + f + g
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 7L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 2L),
+        Row(1L, 7L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 8 arg df_udf") {
+    val add = df_udf(new UDF8[Column, Column, Column, Column, Column, Column, Column,
+      Column, Column] {
+      override def call(a: Column, b: Column, c: Column, d: Column, e: Column,
+                        f: Column, g: Column, h: Column): Column = a + b + c + d + e + f + g + h
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 9L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 9L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2))).collect())
+    }
+  }
+
+  test("jbasic 9 arg df_udf") {
+    val add = df_udf(new UDF9[Column, Column, Column, Column, Column, Column, Column,
+      Column, Column, Column] {
+      override def call(a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column, h:Column, i:Column): Column =
+      a + b + c + d + e + f + g + h + i
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 10L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2, id)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 10L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2), col("id"))).collect())
+    }
+  }
+
+  test("jbasic 10 arg df_udf") {
+    val add = df_udf(new UDF10[Column, Column, Column, Column, Column, Column, Column,
+      Column, Column, Column, Column] {
+      override def call(a: Column, b:Column, c:Column, d:Column, e:Column,
+                                f:Column, g:Column, h:Column, i:Column, j:Column): Column =
+      a + b + c + d + e + f + g + h + i + j
+    })
+    withSparkSession { spark =>
+      spark.udf.register("add", add)
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 11L)),
+        spark.range(2).selectExpr("id", "add(id, id, 1, id, 1, id, id, 2, id, id)").collect())
+      assertSame(Array(
+        Row(0L, 4L),
+        Row(1L, 11L)),
+        spark.range(2).select(col("id"), add(col("id"), col("id"), lit(1),
+          col("id"), lit(1), col("id"), col("id"), lit(2), col("id"), col("id"))).collect())
+    }
+  }
+}
\ No newline at end of file
diff --git a/integration_tests/src/test/scala/org/apache/spark/sql/nvidia/SparkTestBase.scala b/integration_tests/src/test/scala/org/apache/spark/sql/nvidia/SparkTestBase.scala
new file mode 100644
index 00000000000..2bd6697ffad
--- /dev/null
+++ b/integration_tests/src/test/scala/org/apache/spark/sql/nvidia/SparkTestBase.scala
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.nvidia
+
+import java.io.File
+import java.nio.file.Files
+import java.util.{Locale, TimeZone}
+
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Row, SparkSession}
+
+object SparkSessionHolder extends Logging {
+  private var spark = createSparkSession()
+  private var origConf = spark.conf.getAll
+  private var origConfKeys = origConf.keys.toSet
+
+  private def setAllConfs(confs: Array[(String, String)]): Unit = confs.foreach {
+    case (key, value) if spark.conf.get(key, null) != value =>
+      spark.conf.set(key, value)
+    case _ => // No need to modify it
+  }
+
+  private def createSparkSession(): SparkSession = {
+    SparkSession.cleanupAnyExistingSession()
+
+    TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
+    Locale.setDefault(Locale.US)
+
+    val builder = SparkSession.builder()
+      .master("local[1]")
+      .config("spark.sql.extensions", "com.nvidia.spark.DFUDFPlugin")
+      .config("spark.sql.warehouse.dir", sparkWarehouseDir.getAbsolutePath)
+      .appName("dataframe udf tests")
+
+    builder.getOrCreate()
+  }
+
+  private def reinitSession(): Unit = {
+    spark = createSparkSession()
+    origConf = spark.conf.getAll
+    origConfKeys = origConf.keys.toSet
+  }
+
+  def sparkSession: SparkSession = {
+    if (SparkSession.getActiveSession.isEmpty) {
+      reinitSession()
+    }
+    spark
+  }
+
+  def resetSparkSessionConf(): Unit = {
+    if (SparkSession.getActiveSession.isEmpty) {
+      reinitSession()
+    } else {
+      setAllConfs(origConf.toArray)
+      val currentKeys = spark.conf.getAll.keys.toSet
+      val toRemove = currentKeys -- origConfKeys
+      if (toRemove.contains("spark.shuffle.manager")) {
+        // cannot unset the config so need to reinitialize
+        reinitSession()
+      } else {
+        toRemove.foreach(spark.conf.unset)
+      }
+    }
+    logDebug(s"RESET CONF TO: ${spark.conf.getAll}")
+  }
+
+  def withSparkSession[U](conf: SparkConf, f: SparkSession => U): U = {
+    resetSparkSessionConf()
+    logDebug(s"SETTING  CONF: ${conf.getAll.toMap}")
+    setAllConfs(conf.getAll)
+    logDebug(s"RUN WITH CONF: ${spark.conf.getAll}\n")
+    f(spark)
+  }
+
+  private lazy val sparkWarehouseDir: File = {
+    new File(System.getProperty("java.io.tmpdir")).mkdirs()
+    val path = Files.createTempDirectory("spark-warehouse")
+    val file = new File(path.toString)
+    file.deleteOnExit()
+    file
+  }
+}
+
+/**
+ * Base to be able to run tests with a spark context
+ */
+trait SparkTestBase extends AnyFunSuite with BeforeAndAfterAll {
+  def withSparkSession[U](f: SparkSession => U): U = {
+    withSparkSession(new SparkConf, f)
+  }
+
+  def withSparkSession[U](conf: SparkConf, f: SparkSession => U): U = {
+    SparkSessionHolder.withSparkSession(conf, f)
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SparkSession.cleanupAnyExistingSession()
+  }
+
+  def assertSame(expected: Any, actual: Any, epsilon: Double = 0.0,
+                 path: List[String] = List.empty): Unit = {
+    def assertDoublesAreEqualWithinPercentage(expected: Double,
+                                              actual: Double, path: List[String]): Unit = {
+      if (expected != actual) {
+        if (expected != 0) {
+          val v = Math.abs((expected - actual) / expected)
+          assert(v <= epsilon,
+            s"$path: ABS($expected - $actual) / ABS($actual) == $v is not <= $epsilon ")
+        } else {
+          val v = Math.abs(expected - actual)
+          assert(v <= epsilon, s"$path: ABS($expected - $actual) == $v is not <= $epsilon ")
+        }
+      }
+    }
+    (expected, actual) match {
+      case (a: Float, b: Float) if a.isNaN && b.isNaN =>
+      case (a: Double, b: Double) if a.isNaN && b.isNaN =>
+      case (null, null) =>
+      case (null, other) => fail(s"$path: expected is null, but actual is $other")
+      case (other, null) => fail(s"$path: expected is $other, but actual is null")
+      case (a: Array[_], b: Array[_]) =>
+        assert(a.length == b.length,
+          s"$path: expected (${a.toList}) and actual (${b.toList}) lengths don't match")
+        a.indices.foreach { i =>
+          assertSame(a(i), b(i), epsilon, path :+ i.toString)
+        }
+      case (a: Map[_, _], b: Map[_, _]) =>
+        throw new IllegalStateException(s"Maps are not supported yet for comparison $a vs $b")
+      case (a: Iterable[_], b: Iterable[_]) =>
+        assert(a.size == b.size,
+          s"$path: expected (${a.toList}) and actual (${b.toList}) lengths don't match")
+        var i = 0
+        a.zip(b).foreach {
+          case (l, r) =>
+            assertSame(l, r, epsilon, path :+ i.toString)
+            i += 1
+        }
+      case (a: Product, b: Product) =>
+        assertSame(a.productIterator.toSeq, b.productIterator.toSeq, epsilon, path)
+      case (a: Row, b: Row) =>
+        assertSame(a.toSeq, b.toSeq, epsilon, path)
+      // 0.0 == -0.0, turn float/double to bits before comparison, to distinguish 0.0 and -0.0.
+      case (a: Double, b: Double) if epsilon <= 0 =>
+        java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b)
+      case (a: Double, b: Double) if epsilon > 0 =>
+        assertDoublesAreEqualWithinPercentage(a, b, path)
+      case (a: Float, b: Float) if epsilon <= 0 =>
+        java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b)
+      case (a: Float, b: Float) if epsilon > 0 =>
+        assertDoublesAreEqualWithinPercentage(a, b, path)
+      case (a, b) =>
+        assert(a == b, s"$path: $a != $b")
+    }
+  }
+}
diff --git a/jdk-profiles/pom.xml b/jdk-profiles/pom.xml
index ce10ccb0db3..689f3576b89 100644
--- a/jdk-profiles/pom.xml
+++ b/jdk-profiles/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
     </parent>
     <groupId>com.nvidia</groupId>
     <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
     <packaging>pom</packaging>
     <description>Shim JDK Profiles</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <profiles>
         <profile>
             <id>jdk8</id>
diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index 5b0a2bf1226..2f8b926898a 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -91,7 +91,7 @@ pipeline {
                         // 'name' and 'value' only supprt literal string in the declarative Jenkins
                         // Refer to Jenkins issue https://issues.jenkins.io/browse/JENKINS-62127
                         name 'DB_RUNTIME'
-                        values '11.3', '12.2', '13.3'
+                        values '11.3', '12.2', '13.3', '14.3'
                     }
                 }
                 stages {
@@ -175,20 +175,23 @@ void databricksBuild() {
             }
         }
 
-        stage("Test agaist $SPARK_MAJOR DB") {
-            script {
-                container('cpu') {
-                    try {
-                        withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
-                            def TEST_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID  -e TEST_MODE=$TEST_MODE" +
-                                " -p $DATABRICKS_PRIVKEY -l ./jenkins/databricks/test.sh -v $BASE_SPARK_VERSION -d /home/ubuntu/test.sh"
-                            if (params.SPARK_CONF) {
-                                TEST_PARAMS += " -f ${params.SPARK_CONF}"
+        // TODO: Temporarily skip tests on Databricks 14.3 until the test failures are fixed
+        if (env.DB_RUNTIME != '14.3') {
+            stage("Test agaist $SPARK_MAJOR DB") {
+                script {
+                    container('cpu') {
+                        try {
+                            withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
+                                def TEST_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID  -e TEST_MODE=$TEST_MODE" +
+                                    " -p $DATABRICKS_PRIVKEY -l ./jenkins/databricks/test.sh -v $BASE_SPARK_VERSION -d /home/ubuntu/test.sh"
+                                if (params.SPARK_CONF) {
+                                    TEST_PARAMS += " -f ${params.SPARK_CONF}"
+                                }
+                                sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS"
                             }
-                            sh "python3 ./jenkins/databricks/run-tests.py $TEST_PARAMS"
+                        } finally {
+                            common.publishPytestResult(this, "${STAGE_NAME}")
                         }
-                    } finally {
-                        common.publishPytestResult(this, "${STAGE_NAME}")
                     }
                 }
             }
diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh
index 25bade91968..f6ff6e913b6 100755
--- a/jenkins/databricks/build.sh
+++ b/jenkins/databricks/build.sh
@@ -73,6 +73,14 @@ initialize()
     # the version of Spark used when we install the Databricks jars in .m2
     BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS:-$BASE_SPARK_VERSION}
     SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS}-databricks
+    DBR_VER=$(cat /databricks/DBR_VERSION)
+    if [ $DBR_VER == '14.3' ]; then 
+        DBR_VER=$(echo $DBR_VER | sed 's/\.//g')
+        # We are appending 143 in addition to the base spark version because Databricks 14.3
+        # and Databricks 15.4 are both based on spark version 3.5.0
+        BUILDVER="$BUILDVER$DBR_VER"
+        SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS="$SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS-$DBR_VER"
+    fi
 
     # pull normal Spark artifacts and ignore errors then install databricks jars, then build again.
     # this should match the databricks init script.
diff --git a/jenkins/databricks/clusterutils.py b/jenkins/databricks/clusterutils.py
index 2c31fd0e6be..8e1b272ef5a 100644
--- a/jenkins/databricks/clusterutils.py
+++ b/jenkins/databricks/clusterutils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -85,6 +85,8 @@ def wait_for_cluster_start(workspace, clusterid, token, retries=20, printLoc=sys
             if current_state in ['INTERNAL_ERROR', 'SKIPPED', 'TERMINATED'] or p >= 60:
                 if p >= retries:
                    print("Waited %d times already, stopping" % p)
+                # Output the cluster ID to stdout so a calling script can get it easily
+                print(clusterid, file=sys.stdout)
                 sys.exit(4)
             p = p + 1
         print("Done starting cluster", file=printLoc)
diff --git a/jenkins/databricks/create.py b/jenkins/databricks/create.py
index fa2c129da7b..4354886e5b7 100644
--- a/jenkins/databricks/create.py
+++ b/jenkins/databricks/create.py
@@ -27,16 +27,16 @@ def main():
   workspace = 'https://dbc-9ff9942e-a9c4.cloud.databricks.com'
   token = ''
   sshkey = ''
-  cluster_name = 'CI-GPU-databricks-24.10.1'
+  cluster_name = 'CI-GPU-databricks-24.12.0'
   idletime = 240
-  runtime = '7.0.x-gpu-ml-scala2.12'
+  runtime = '13.3.x-gpu-ml-scala2.12'
   num_workers = 1
   worker_type = 'g4dn.xlarge'
   driver_type = 'g4dn.xlarge'
   cloud_provider = 'aws'
   # comma separated init scripts in Databricks workspace, e.g. /foo,/bar,...
   init_scripts = ''
-  aws_zone='us-west-2c'
+  aws_zone='auto'
 
 
   try:
diff --git a/jenkins/databricks/deploy.sh b/jenkins/databricks/deploy.sh
index 6c89af57631..1079ee7dc6a 100755
--- a/jenkins/databricks/deploy.sh
+++ b/jenkins/databricks/deploy.sh
@@ -29,7 +29,12 @@ SCALA_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=scala.binary.version -
 VERSION_NUM=${BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS//.}
 SPARK_VERSION_STR=spark$VERSION_NUM
 SPARK_PLUGIN_JAR_VERSION=`mvn help:evaluate -q -pl dist -Dexpression=project.version -DforceStdout`
-DB_SHIM_NAME=${SPARK_VERSION_STR}db
+# Append 143 into the db shim version because Databricks 14.3.x and 15.4.x are both based on spark version 3.5.0
+if [[ "$DB_RUNTIME" == "14.3"* ]]; then
+    DB_SHIM_NAME="${SPARK_VERSION_STR}db143"
+else
+    DB_SHIM_NAME="${SPARK_VERSION_STR}db"
+fi
 DBJARFPATH=./aggregator/target/${DB_SHIM_NAME}/rapids-4-spark-aggregator_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION-${DB_SHIM_NAME}.jar
 echo "Databricks jar is: $DBJARFPATH"
 MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -DretryFailedDeploymentCount=3"
diff --git a/jenkins/databricks/init_cudf_udf.sh b/jenkins/databricks/init_cudf_udf.sh
index deaf127cd5a..16b90b95c0e 100755
--- a/jenkins/databricks/init_cudf_udf.sh
+++ b/jenkins/databricks/init_cudf_udf.sh
@@ -20,7 +20,7 @@
 
 set -ex
 
-CUDF_VER=${CUDF_VER:-24.10}
+CUDF_VER=${CUDF_VER:-24.12}
 CUDA_VER=${CUDA_VER:-11.8}
 
 # Need to explicitly add conda into PATH environment, to activate conda environment.
diff --git a/jenkins/databricks/install_deps.py b/jenkins/databricks/install_deps.py
index 7b77396b3f8..23453912827 100644
--- a/jenkins/databricks/install_deps.py
+++ b/jenkins/databricks/install_deps.py
@@ -42,6 +42,11 @@ def define_deps(spark_version, scala_version):
     elif spark_version.startswith('3.4'):
         spark_prefix = '----ws_3_4'
         mvn_prefix = '--mvn'
+    elif spark_version.startswith('3.5'):
+        spark_prefix = '----ws_3_5'
+        mvn_prefix = '--mvn'
+    else:
+        raise Exception(f"Unsupported Databricks version {spark.version}")
 
     spark_suffix = f'hive-{hive_version}__hadoop-{hadoop_version}_{scala_version}'
 
@@ -69,7 +74,7 @@ def define_deps(spark_version, scala_version):
         Artifact('org.apache.spark', f'spark-core_{scala_version}',
                  f'{spark_prefix}--core--core-{spark_suffix}_deploy.jar'),
         Artifact('org.apache.spark', f'spark-versions_{scala_version}',
-                 f'spark--versions--*--shim_{scala_version}_deploy.jar'),
+                 f'spark--versions--*--shim*_{scala_version}_deploy.jar'),
         Artifact('org.apache.spark', f'databricks-versions_{scala_version}',
                  f'common--build-info--build-info-spark_*_{scala_version}_deploy.jar'),
         # Spark Hive Patches
@@ -125,15 +130,17 @@ def define_deps(spark_version, scala_version):
         Artifact('com.fasterxml.jackson.core', 'jackson-annotations',
                  f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-annotations--com.fasterxml.jackson.core__jackson-annotations__*.jar'),
         Artifact('org.apache.spark', f'spark-avro_{scala_version}',
-                 f'{spark_prefix}--vendor--avro--avro-*.jar'),
+                 f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro*.jar' if spark_version.startswith('3.5') else f'{spark_prefix}--vendor--avro--avro-*.jar'),
         Artifact('org.apache.avro', 'avro-mapred',
                  f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro-mapred--org.apache.avro__avro-mapred__*.jar'),
         Artifact('org.apache.avro', 'avro',
                  f'{prefix_ws_sp_mvn_hadoop}--org.apache.avro--avro--org.apache.avro__avro__*.jar'),
+        Artifact('com.github.luben', 'zstd-jni',
+                 f'{prefix_ws_sp_mvn_hadoop}--com.github.luben--zstd-jni--com.github.luben__zstd-jni__*.jar'),
     ]
 
     # Parquet
-    if spark_version.startswith('3.4'):
+    if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
         deps += [
         Artifact('org.apache.parquet', 'parquet-hadoop',
              f'{spark_prefix}--third_party--parquet-mr--parquet-hadoop--parquet-hadoop-shaded--*--libparquet-hadoop-internal.jar'),
@@ -162,7 +169,7 @@ def define_deps(spark_version, scala_version):
 
 
     # log4j-core
-    if spark_version.startswith('3.3') or spark_version.startswith('3.4'):
+    if spark_version.startswith('3.3') or spark_version.startswith('3.4') or spark_version.startswith('3.5'):
         deps += Artifact('org.apache.logging.log4j', 'log4j-core',
                          f'{prefix_ws_sp_mvn_hadoop}--org.apache.logging.log4j--log4j-core--org.apache.logging.log4j__log4j-core__*.jar'),
 
@@ -172,7 +179,7 @@ def define_deps(spark_version, scala_version):
                  f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-parser-combinators_{scala_version}-*.jar')
     ]
 
-    if spark_version.startswith('3.4'):
+    if spark_version.startswith('3.4') or spark_version.startswith('3.5'):
         deps += [
         # Spark Internal Logging
         Artifact('org.apache.spark', f'spark-common-utils_{scala_version}', f'{spark_prefix}--common--utils--common-utils-hive-2.3__hadoop-3.2_2.12_deploy.jar'),
@@ -180,6 +187,12 @@ def define_deps(spark_version, scala_version):
         Artifact('org.apache.spark', f'spark-sql-api_{scala_version}', f'{spark_prefix}--sql--api--sql-api-hive-2.3__hadoop-3.2_2.12_deploy.jar')
         ]
 
+    if spark_version.startswith('3.5'):
+        deps += [
+        Artifact('org.scala-lang.modules', f'scala-collection-compat_{scala_version}',
+             f'{prefix_ws_sp_mvn_hadoop}--org.scala-lang.modules--scala-collection-compat_{scala_version}--org.scala-lang.modules__scala-collection-compat_{scala_version}__2.11.0.jar'), 
+        Artifact('org.apache.avro', f'avro-connector', f'{spark_prefix}--connector--avro--avro-hive-2.3__hadoop-3.2_2.12_shaded---606136534--avro-unshaded-hive-2.3__hadoop-3.2_2.12_deploy.jar')
+        ]
 
     return deps
 
diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh
index 7f7ba8d65a9..00735e02c84 100755
--- a/jenkins/spark-nightly-build.sh
+++ b/jenkins/spark-nightly-build.sh
@@ -19,6 +19,11 @@ set -ex
 
 . jenkins/version-def.sh
 
+## MVN_OPT : maven options environment, e.g. MVN_OPT='-Dspark-rapids-jni.version=xxx' to specify spark-rapids-jni dependency's version.
+MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -DretryFailedDeploymentCount=3 ${MVN_OPT} -Psource-javadoc"
+
+DIST_PL="dist"
+DIST_PATH="$DIST_PL" # The path of the dist module is used only outside of the mvn cmd
 SCALA_BINARY_VER=${SCALA_BINARY_VER:-"2.12"}
 if [ $SCALA_BINARY_VER == "2.13" ]; then
     # Run scala2.13 build and test against JDK17
@@ -26,18 +31,14 @@ if [ $SCALA_BINARY_VER == "2.13" ]; then
     update-java-alternatives --set $JAVA_HOME
     java -version
 
-    cd scala2.13
-    ln -sf ../jenkins jenkins
+    MVN="$MVN -f scala2.13/"
+    DIST_PATH="scala2.13/$DIST_PL"
 fi
 
 WORKSPACE=${WORKSPACE:-$(pwd)}
 ## export 'M2DIR' so that shims can get the correct Spark dependency info
 export M2DIR=${M2DIR:-"$WORKSPACE/.m2"}
 
-## MVN_OPT : maven options environment, e.g. MVN_OPT='-Dspark-rapids-jni.version=xxx' to specify spark-rapids-jni dependency's version.
-MVN="mvn -Dmaven.wagon.http.retryHandler.count=3 -DretryFailedDeploymentCount=3 ${MVN_OPT} -Psource-javadoc"
-
-DIST_PL="dist"
 function mvnEval {
     $MVN help:evaluate -q -pl $DIST_PL $MVN_URM_MIRROR -Prelease320 -Dmaven.repo.local=$M2DIR -DforceStdout -Dexpression=$1
 }
@@ -80,7 +81,7 @@ function distWithReducedPom {
             mvnCmd="deploy:deploy-file"
             if (( ${#CLASSIFIERS_ARR[@]} > 1 )); then
               # try move tmp artifacts back to target folder for simplifying separate release process
-              mv ${TMP_PATH}/${ART_ID}-${ART_VER}-*.jar ${DIST_PL}/target/
+              mv ${TMP_PATH}/${ART_ID}-${ART_VER}-*.jar ${DIST_PATH}/target/
             fi
             mvnExtraFlags="-Durl=${URM_URL}-local -DrepositoryId=snapshots -Dtypes=${DEPLOY_TYPES} -Dfiles=${DEPLOY_FILES} -Dclassifiers=${DEPLOY_CLASSIFIERS}"
             ;;
@@ -166,7 +167,7 @@ if (( ${#CLASSIFIERS_ARR[@]} > 1 )); then
 
     # move artifacts to temp for deployment later
     artifactFile="${ART_ID}-${ART_VER}-${classifier}.jar"
-    mv ${DIST_PL}/target/${artifactFile} ${TMP_PATH}/
+    mv ${DIST_PATH}/target/${artifactFile} ${TMP_PATH}/
     # update deployment properties
     DEPLOY_TYPES="${DEPLOY_TYPES},jar"
     DEPLOY_FILES="${DEPLOY_FILES},${DIST_PL}/target/${artifactFile}"
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index e09558425e3..150de339e09 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -191,9 +191,6 @@ ci_scala213() {
     update-java-alternatives --set $JAVA_HOME
     java -version
 
-    cd scala2.13
-    ln -sf ../jenkins jenkins
-
     # Download a Scala 2.13 version of Spark
     prepare_spark 3.3.0 2.13
 
@@ -202,15 +199,15 @@ ci_scala213() {
     do
         echo "Spark version (Scala 2.13): $version"
         env -u SPARK_HOME \
-            $MVN_CMD -U -B $MVN_URM_MIRROR -Dbuildver=$version clean install $MVN_BUILD_ARGS -Dpytest.TEST_TAGS=''
+            $MVN_CMD -f scala2.13/ -U -B $MVN_URM_MIRROR -Dbuildver=$version clean install $MVN_BUILD_ARGS -Dpytest.TEST_TAGS=''
         # Run filecache tests
         env -u SPARK_HOME SPARK_CONF=spark.rapids.filecache.enabled=true \
-            $MVN_CMD -B $MVN_URM_MIRROR -Dbuildver=$version test -rf tests $MVN_BUILD_ARGS -Dpytest.TEST_TAGS='' \
+            $MVN_CMD -f scala2.13/ -B $MVN_URM_MIRROR -Dbuildver=$version test -rf tests $MVN_BUILD_ARGS -Dpytest.TEST_TAGS='' \
             -DwildcardSuites=org.apache.spark.sql.rapids.filecache.FileCacheIntegrationSuite
     done
 
-    $MVN_CMD -U -B $MVN_URM_MIRROR clean package $MVN_BUILD_ARGS -DskipTests=true
-    cd .. # Run integration tests in the project root dir to leverage test cases and resource files
+    $MVN_CMD -f scala2.13/ -U -B $MVN_URM_MIRROR clean package $MVN_BUILD_ARGS -DskipTests=true
+
     export TEST_TAGS="not premerge_ci_1"
     export TEST_TYPE="pre-commit"
     # SPARK_HOME (and related) must be set to a Spark built with Scala 2.13
diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
index e09fe78cbf7..65cc2975380 100755
--- a/jenkins/spark-tests.sh
+++ b/jenkins/spark-tests.sh
@@ -41,7 +41,7 @@ RAPIDS_TEST_JAR="$ARTF_ROOT/rapids-4-spark-integration-tests_${SCALA_BINARY_VER}
 
 export INCLUDE_SPARK_AVRO_JAR=${INCLUDE_SPARK_AVRO_JAR:-"true"}
 if [[ "${INCLUDE_SPARK_AVRO_JAR}" == "true" ]]; then
-  $WGET_CMD $PROJECT_REPO/org/apache/spark/spark-avro_$SCALA_BINARY_VER/$SPARK_VER/spark-avro_$SCALA_BINARY_VER-${SPARK_VER}.jar
+  $WGET_CMD $SPARK_REPO/org/apache/spark/spark-avro_$SCALA_BINARY_VER/$SPARK_VER/spark-avro_$SCALA_BINARY_VER-${SPARK_VER}.jar
 fi
 
 $WGET_CMD $PROJECT_TEST_REPO/com/nvidia/rapids-4-spark-integration-tests_$SCALA_BINARY_VER/$PROJECT_TEST_VER/rapids-4-spark-integration-tests_$SCALA_BINARY_VER-$PROJECT_TEST_VER-pytest.tar.gz
@@ -94,7 +94,7 @@ $WGET_CMD $SPARK_REPO/org/apache/spark/$SPARK_VER/spark-$SPARK_VER-$BIN_HADOOP_V
 # Download parquet-hadoop jar for parquet-read encryption tests
 PARQUET_HADOOP_VER=`mvn help:evaluate -q -N -Dexpression=parquet.hadoop.version -DforceStdout -Dbuildver=${SHUFFLE_SPARK_SHIM/spark/}`
 if [[ "$(printf '%s\n' "1.12.0" "$PARQUET_HADOOP_VER" | sort -V | head -n1)" = "1.12.0" ]]; then
-  $WGET_CMD $PROJECT_REPO/org/apache/parquet/parquet-hadoop/$PARQUET_HADOOP_VER/parquet-hadoop-$PARQUET_HADOOP_VER-tests.jar
+  $WGET_CMD $SPARK_REPO/org/apache/parquet/parquet-hadoop/$PARQUET_HADOOP_VER/parquet-hadoop-$PARQUET_HADOOP_VER-tests.jar
 fi
 
 export SPARK_HOME="$ARTF_ROOT/spark-$SPARK_VER-$BIN_HADOOP_VER"
diff --git a/jenkins/version-def.sh b/jenkins/version-def.sh
index a56515751aa..7acdd6204a5 100755
--- a/jenkins/version-def.sh
+++ b/jenkins/version-def.sh
@@ -29,8 +29,8 @@ IFS=$PRE_IFS
 
 CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-"cuda11"}
 CLASSIFIER=${CLASSIFIER:-"$CUDA_CLASSIFIER"} # default as CUDA_CLASSIFIER for compatibility
-PROJECT_VER=${PROJECT_VER:-"24.10.1"}
-PROJECT_TEST_VER=${PROJECT_TEST_VER:-"24.10.1"}
+PROJECT_VER=${PROJECT_VER:-"24.12.0"}
+PROJECT_TEST_VER=${PROJECT_TEST_VER:-"24.12.0"}
 SPARK_VER=${SPARK_VER:-"3.2.0"}
 SPARK_VER_213=${SPARK_VER_213:-"3.3.0"}
 # Make a best attempt to set the default value for the shuffle shim.
diff --git a/pom.xml b/pom.xml
index 4e356af5db5..470f198e5fb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -23,7 +23,7 @@
     <artifactId>rapids-4-spark-parent_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Root Project</name>
     <description>The root project of the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <packaging>pom</packaging>
 
     <url>https://nvidia.github.io/spark-rapids/</url>
@@ -105,6 +105,7 @@
                 <spark.version>${spark320.version}</spark.version>
                 <spark.test.version>${spark320.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -125,6 +126,7 @@
                 <spark.version>${spark321.version}</spark.version>
                 <spark.test.version>${spark321.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -145,6 +147,7 @@
                 <spark.version>${spark321cdh.version}</spark.version>
                 <spark.test.version>${spark321cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 <!-- #endif scala-2.12 -->
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -172,6 +175,7 @@
                 <spark.version>${spark322.version}</spark.version>
                 <spark.test.version>${spark322.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -192,6 +196,7 @@
                 <spark.version>${spark323.version}</spark.version>
                 <spark.test.version>${spark323.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -212,6 +217,7 @@
                 <spark.version>${spark324.version}</spark.version>
                 <spark.test.version>${spark324.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -234,6 +240,9 @@
                 <spark.version>${spark330.version}</spark.version>
                 <spark.test.version>${spark330.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -255,6 +264,9 @@
                 <spark.version>${spark331.version}</spark.version>
                 <spark.test.version>${spark331.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -276,6 +288,9 @@
                 <spark.version>${spark332.version}</spark.version>
                 <spark.test.version>${spark332.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -297,6 +312,9 @@
                 <spark.version>${spark333.version}</spark.version>
                 <spark.test.version>${spark333.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -318,6 +336,9 @@
                 <spark.version>${spark334.version}</spark.version>
                 <spark.test.version>${spark334.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -339,6 +360,7 @@
                 <spark.version>${spark340.version}</spark.version>
                 <spark.test.version>${spark340.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -359,6 +381,7 @@
                 <spark.version>${spark341.version}</spark.version>
                 <spark.test.version>${spark341.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -379,6 +402,7 @@
                 <spark.version>${spark342.version}</spark.version>
                 <spark.test.version>${spark342.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -399,6 +423,28 @@
                 <spark.version>${spark343.version}</spark.version>
                 <spark.test.version>${spark343.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.6</slf4j.version>
+            </properties>
+            <modules>
+                <module>delta-lake/delta-24x</module>
+            </modules>
+        </profile>
+        <profile>
+            <id>release344</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>344</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>344</buildver>
+                <spark.version>${spark344.version}</spark.version>
+                <spark.test.version>${spark344.version}</spark.test.version>
+                <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -419,6 +465,9 @@
                 <spark.version>${spark330cdh.version}</spark.version>
                 <spark.test.version>${spark330cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.99.7.1.8.0-801</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -445,6 +494,9 @@
                 <spark.version>${spark332cdh.version}</spark.version>
                 <spark.test.version>${spark332cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.99.7.1.9.0-387</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -475,6 +527,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -499,6 +552,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -522,6 +576,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -542,6 +597,7 @@
                 <spark.version>${spark350.version}</spark.version>
                 <spark.test.version>${spark350.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -549,6 +605,32 @@
                 <module>delta-lake/delta-stub</module>
             </modules>
         </profile>
+        <profile>
+            <!-- Note Databricks requires 2 properties -Ddatabricks and -Dbuildver=350db143 -->
+            <id>release350db143</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350db143</value>
+                </property>
+            </activation>
+            <properties>
+                <!-- Downgrade scala plugin version due to: https://github.com/sbt/sbt/issues/4305 -->
+                <scala.plugin.version>3.4.4</scala.plugin.version>
+                <spark.version.classifier>spark350db143</spark.version.classifier>
+                <spark.version>${spark350db143.version}</spark.version>
+                <spark.test.version>${spark350db143.version}</spark.test.version>
+                <hadoop.client.version>3.3.1</hadoop.client.version>
+                <rat.consoleOutput>true</rat.consoleOutput>
+                <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+            </properties>
+            <modules>
+                <module>shim-deps/databricks</module>
+                <module>delta-lake/delta-spark350db143</module>
+            </modules>
+        </profile>
         <profile>
             <id>release351</id>
             <activation>
@@ -562,6 +644,7 @@
                 <spark.version>${spark351.version}</spark.version>
                 <spark.test.version>${spark351.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -582,6 +665,28 @@
                 <spark.version>${spark352.version}</spark.version>
                 <spark.test.version>${spark352.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.7</slf4j.version>
+            </properties>
+            <modules>
+                <module>delta-lake/delta-stub</module>
+            </modules>
+        </profile>
+        <profile>
+            <id>release353</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>353</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>353</buildver>
+                <spark.version>${spark353.version}</spark.version>
+                <spark.test.version>${spark353.version}</spark.test.version>
+                <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -603,6 +708,7 @@
                 <spark.version>${spark400.version}</spark.version>
                 <spark.test.version>${spark400.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -707,7 +813,8 @@
 
     <properties>
         <!-- start dyn.shim properties -->
-        <dyn.shim.excluded.releases/>
+        <!-- TODO: will package 350db143 shim into the dist jar in branch-25.02, followed by https://github.com/NVIDIA/spark-rapids/issues/11749 -->
+	<dyn.shim.excluded.releases>350db143</dyn.shim.excluded.releases>
         <!-- end dyn.shim properties -->
 
         <rapids.module>.</rapids.module>
@@ -722,8 +829,8 @@
         <spark.version.classifier>spark${buildver}</spark.version.classifier>
         <cuda.version>cuda11</cuda.version>
         <jni.classifier>${cuda.version}</jni.classifier>
-        <spark-rapids-jni.version>24.10.0</spark-rapids-jni.version>
-        <spark-rapids-private.version>24.10.0</spark-rapids-private.version>
+        <spark-rapids-jni.version>24.12.0-SNAPSHOT</spark-rapids-jni.version>
+        <spark-rapids-private.version>24.12.0-SNAPSHOT</spark-rapids-private.version>
         <scala.binary.version>2.12</scala.binary.version>
         <alluxio.client.version>2.8.0</alluxio.client.version>
         <scala.recompileMode>incremental</scala.recompileMode>
@@ -749,6 +856,15 @@
         <rapids.shade.package>${spark.version.classifier}.com.nvidia.shaded.spark</rapids.shade.package>
         <rapids.shim.jar.phase>none</rapids.shim.jar.phase>
         <rapids.shim.jar.test.phase>package</rapids.shim.jar.test.phase>
+
+        <!--
+            Dummy value just to pass the Maven artifactId format check.
+            Enforcer Plugin checks for the proper value override for each releaseXYZ profile
+         -->
+        <rapids.delta.artifactId1>DEFINE_FOR_EVERY_SPARK_SHIM</rapids.delta.artifactId1>
+
+        <rapids.delta.artifactId2>${rapids.delta.artifactId1}</rapids.delta.artifactId2>
+        <rapids.delta.artifactId3>${rapids.delta.artifactId1}</rapids.delta.artifactId3>
         <test.include.tags/>
         <rapids.shuffle.manager.override>true</rapids.shuffle.manager.override>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -776,14 +892,17 @@
         <spark341.version>3.4.1</spark341.version>
         <spark342.version>3.4.2</spark342.version>
         <spark343.version>3.4.3</spark343.version>
+        <spark344.version>3.4.4</spark344.version>
         <spark330cdh.version>3.3.0.3.3.7180.0-274</spark330cdh.version>
         <spark332cdh.version>3.3.2.3.3.7190.0-91</spark332cdh.version>
         <spark330db.version>3.3.0-databricks</spark330db.version>
         <spark332db.version>3.3.2-databricks</spark332db.version>
         <spark341db.version>3.4.1-databricks</spark341db.version>
+        <spark350db143.version>3.5.0-databricks-143</spark350db143.version>
         <spark350.version>3.5.0</spark350.version>
         <spark351.version>3.5.1</spark351.version>
         <spark352.version>3.5.2</spark352.version>
+        <spark353.version>3.5.3</spark353.version>
         <spark400.version>4.0.0-SNAPSHOT</spark400.version>
         <mockito.version>3.12.4</mockito.version>
         <!-- same as Apache Spark 4.0.0 for Scala 2.13 except for cloudera shims -->
@@ -1467,6 +1586,11 @@ This will force full Scala code rebuild in downstream modules.
                           <message>Minimum Maven version 3.6.x required</message>
                           <version>[3.6,)</version>
                         </requireMavenVersion>
+                        <requireProperty>
+                            <regexMessage>At least one of rapids.delta.artifactId1, rapids.delta.artifactId2 ... is required in the POM profile "release${buildver}"</regexMessage>
+                            <property>rapids.delta.artifactId1</property>
+                            <regex>^rapids-4-spark-delta-.*</regex>
+                        </requireProperty>
                         <!-- #if scala-2.12 -->
                         <requireJavaVersion>
                             <message>Only Java 8, 11, and 17 are supported!</message>
@@ -1568,14 +1692,11 @@ This will force full Scala code rebuild in downstream modules.
                             <skip>${maven.scalastyle.skip}</skip>
                             <target>
                                 <pathconvert property="scalastyle.dirs" pathsep=" ">
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/*/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/*/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/*/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/*/scala-${scala.binary.version}"/>
+                                    <dirset dir="${spark.rapids.source.basedir}">
+                                        <include name="**/src/main"/>
+                                        <include name="**/src/test"/>
+                                        <exclude name="**/target/*/generated/src/**"/>
+                                    </dirset>
                                 </pathconvert>
                                 <echo>Checking scalastyle for all modules using following paths:
                                     ${scalastyle.dirs}
diff --git a/scala2.13/README.md b/scala2.13/README.md
index 4096363cf52..f2e5200c9c7 100644
--- a/scala2.13/README.md
+++ b/scala2.13/README.md
@@ -25,8 +25,7 @@ You can use Maven to build the plugin. Like with Scala 2.12, we recommend buildi
 phase.
 
 ```shell script
-cd scala2.13
-mvn verify
+mvn verify -f scala2.13/
 ```
 
 After a successful build, the RAPIDS Accelerator jar will be in the `scala2.13/dist/target/` directory.
@@ -45,7 +44,6 @@ You can also use the `buildall` script in the parent directory to build against
 of Apache Spark.
 
 ```shell script
-cd ..
 ./build/buildall --profile=noSnapshotsScala213
 ```
 
@@ -72,4 +70,4 @@ That way any new dependencies or other changes will be picked up in the Scala 2.
 You should be able to open the `scala2.13` directory directly in IntelliJ as a separate project. You can build and 
 debug as normal, although there are slight differences in how to navigate the source. In particular, when you select 
 a particular build profile, you will only be able to navigate the source used by modules that are included for that
-spark version.
\ No newline at end of file
+spark version.
diff --git a/scala2.13/aggregator/pom.xml b/scala2.13/aggregator/pom.xml
index 6322f0f9701..053e9370deb 100644
--- a/scala2.13/aggregator/pom.xml
+++ b/scala2.13/aggregator/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-aggregator_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Aggregator</name>
     <description>Creates an aggregated shaded package of the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>aggregator</rapids.module>
@@ -71,6 +71,28 @@
             <version>${spark-rapids-private.version}</version>
             <classifier>${spark.version.classifier}</classifier>
         </dependency>
+        <!--
+            Placeholders for the maximum number of delta-lake shims per Spark shim:
+            If a Shim needs only N it uses rapids.delta.artifactIdN+1 = rapids.delta.artifactIdN
+        -->
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId1}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId2}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>${rapids.delta.artifactId3}_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+        </dependency>
     </dependencies>
     <build>
         <plugins>
@@ -262,507 +284,4 @@
             </plugin>
         </plugins>
     </build>
-
-    <profiles>
-        <profile>
-            <id>release320</id>
-            <activation>
-                <!-- #if scala-2.12 --><!--
-                <activeByDefault>true</activeByDefault>
-                --><!-- #endif scala-2.12 -->
-                <property>
-                    <name>buildver</name>
-                    <value>320</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release321</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>321</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release321cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>321cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release322</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>322</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release323</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>323</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release324</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>324</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-20x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330</id>
-            <activation>
-                <!-- #if scala-2.13 -->
-                <activeByDefault>true</activeByDefault>
-                <!-- #endif scala-2.13 -->
-                <property>
-                    <name>buildver</name>
-                    <value>330</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>330cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332cdh</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332cdh</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release330db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>330db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark330db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release331</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>331</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release332db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>332db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark332db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release341db</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>341db</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-spark341db_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release333</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>333</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release334</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>334</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-21x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-22x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-23x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release340</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>340</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release341</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>341</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release342</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>342</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release343</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>343</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-24x_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release350</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>350</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release351</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>351</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <profile>
-            <id>release352</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>352</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <!-- #if scala-2.13 -->
-        <profile>
-            <id>release400</id>
-            <activation>
-                <property>
-                    <name>buildver</name>
-                    <value>400</value>
-                </property>
-            </activation>
-            <dependencies>
-                <dependency>
-                    <groupId>com.nvidia</groupId>
-                    <artifactId>rapids-4-spark-delta-stub_${scala.binary.version}</artifactId>
-                    <version>${project.version}</version>
-                    <classifier>${spark.version.classifier}</classifier>
-                </dependency>
-            </dependencies>
-        </profile>
-        <!-- #endif scala-2.13 -->
-    </profiles>
 </project>
diff --git a/scala2.13/api_validation/pom.xml b/scala2.13/api_validation/pom.xml
index 2b42c69e42b..7e73ed7ae72 100644
--- a/scala2.13/api_validation/pom.xml
+++ b/scala2.13/api_validation/pom.xml
@@ -22,11 +22,11 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-api-validation_2.13</artifactId>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>api_validation</rapids.module>
diff --git a/scala2.13/datagen/pom.xml b/scala2.13/datagen/pom.xml
index 8f74c241cab..b5f638aa9cf 100644
--- a/scala2.13/datagen/pom.xml
+++ b/scala2.13/datagen/pom.xml
@@ -21,18 +21,19 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>datagen_2.13</artifactId>
     <name>Data Generator</name>
     <description>Tools for generating large amounts of data</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>datagen</rapids.module>
         <target.classifier/>
         <rapids.default.jar.excludePattern>**/*</rapids.default.jar.excludePattern>
         <rapids.shim.jar.phase>package</rapids.shim.jar.phase>
+        <build.info.path>${project.build.outputDirectory}/datagen-version-info.properties</build.info.path>
     </properties>
     <dependencies>
         <dependency>
diff --git a/scala2.13/delta-lake/delta-20x/pom.xml b/scala2.13/delta-lake/delta-20x/pom.xml
index 683c7d93d4c..0eee940615f 100644
--- a/scala2.13/delta-lake/delta-20x/pom.xml
+++ b/scala2.13/delta-lake/delta-20x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-20x_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.0.x Support</name>
     <description>Delta Lake 2.0.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-20x</rapids.module>
diff --git a/scala2.13/delta-lake/delta-21x/pom.xml b/scala2.13/delta-lake/delta-21x/pom.xml
index 48a61e6be3e..2b457c80aea 100644
--- a/scala2.13/delta-lake/delta-21x/pom.xml
+++ b/scala2.13/delta-lake/delta-21x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-21x_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.1.x Support</name>
     <description>Delta Lake 2.1.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-21x</rapids.module>
diff --git a/scala2.13/delta-lake/delta-22x/pom.xml b/scala2.13/delta-lake/delta-22x/pom.xml
index 95ce774c349..42e5685444c 100644
--- a/scala2.13/delta-lake/delta-22x/pom.xml
+++ b/scala2.13/delta-lake/delta-22x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-22x_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.2.x Support</name>
     <description>Delta Lake 2.2.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-22x</rapids.module>
diff --git a/scala2.13/delta-lake/delta-23x/pom.xml b/scala2.13/delta-lake/delta-23x/pom.xml
index e2773a45775..05a38a75ce6 100644
--- a/scala2.13/delta-lake/delta-23x/pom.xml
+++ b/scala2.13/delta-lake/delta-23x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-23x_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support</name>
     <description>Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-23x</rapids.module>
diff --git a/scala2.13/delta-lake/delta-24x/pom.xml b/scala2.13/delta-lake/delta-24x/pom.xml
index 71db006ad1a..c1b106d0f55 100644
--- a/scala2.13/delta-lake/delta-24x/pom.xml
+++ b/scala2.13/delta-lake/delta-24x/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-24x_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake 2.4.x Support</name>
     <description>Delta Lake 2.4.x support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-24x</rapids.module>
diff --git a/scala2.13/delta-lake/delta-spark330db/pom.xml b/scala2.13/delta-lake/delta-spark330db/pom.xml
index fc9fa866490..2fa49fe5847 100644
--- a/scala2.13/delta-lake/delta-spark330db/pom.xml
+++ b/scala2.13/delta-lake/delta-spark330db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark330db_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 11.3 Delta Lake Support</name>
     <description>Databricks 11.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-spark330db</rapids.module>
diff --git a/scala2.13/delta-lake/delta-spark332db/pom.xml b/scala2.13/delta-lake/delta-spark332db/pom.xml
index 1b08c59eabe..a8cae9d2f82 100644
--- a/scala2.13/delta-lake/delta-spark332db/pom.xml
+++ b/scala2.13/delta-lake/delta-spark332db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark332db_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 12.2 Delta Lake Support</name>
     <description>Databricks 12.2 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-spark332db</rapids.module>
diff --git a/scala2.13/delta-lake/delta-spark341db/pom.xml b/scala2.13/delta-lake/delta-spark341db/pom.xml
index cb48f620886..9583419dc7c 100644
--- a/scala2.13/delta-lake/delta-spark341db/pom.xml
+++ b/scala2.13/delta-lake/delta-spark341db/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-spark341db_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support</name>
     <description>Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.compressed.artifact>false</rapids.compressed.artifact>
diff --git a/scala2.13/delta-lake/delta-spark350db143/pom.xml b/scala2.13/delta-lake/delta-spark350db143/pom.xml
new file mode 100644
index 00000000000..da47b99455c
--- /dev/null
+++ b/scala2.13/delta-lake/delta-spark350db143/pom.xml
@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (c) 2023-2024 NVIDIA CORPORATION.
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>com.nvidia</groupId>
+        <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
+        <version>24.12.0</version>
+        <relativePath>../../shim-deps/pom.xml</relativePath>
+    </parent>
+
+    <artifactId>rapids-4-spark-delta-spark350db143_2.13</artifactId>
+    <name>RAPIDS Accelerator for Apache Spark Databricks 13.3 Delta Lake Support</name>
+    <description>Databricks 13.3 Delta Lake support for the RAPIDS Accelerator for Apache Spark</description>
+    <version>24.12.0</version>
+
+    <properties>
+        <rapids.compressed.artifact>false</rapids.compressed.artifact>
+        <rapids.default.jar.excludePattern>**/*</rapids.default.jar.excludePattern>
+        <rapids.shim.jar.phase>package</rapids.shim.jar.phase>
+    </properties>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.roaringbitmap</groupId>
+            <artifactId>RoaringBitmap</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.nvidia</groupId>
+            <artifactId>rapids-4-spark-sql_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <classifier>${spark.version.classifier}</classifier>
+            <scope>provided</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>add-common-sources</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>${project.basedir}/../common/src/main/scala</source>
+                                <source>${project.basedir}/../common/src/main/databricks/scala</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>net.alchim31.maven</groupId>
+                <artifactId>scala-maven-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.rat</groupId>
+                <artifactId>apache-rat-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+</project>
diff --git a/scala2.13/delta-lake/delta-stub/pom.xml b/scala2.13/delta-lake/delta-stub/pom.xml
index 5698b32e431..989450c3e7e 100644
--- a/scala2.13/delta-lake/delta-stub/pom.xml
+++ b/scala2.13/delta-lake/delta-stub/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../jdk-profiles/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-delta-stub_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Delta Lake Stub</name>
     <description>Delta Lake stub for the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../delta-lake/delta-stub</rapids.module>
diff --git a/scala2.13/dist/pom.xml b/scala2.13/dist/pom.xml
index 3634bf3b78d..d11161e9d7e 100644
--- a/scala2.13/dist/pom.xml
+++ b/scala2.13/dist/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Distribution</name>
     <description>Creates the distribution package of the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <dependencies>
         <dependency>
             <groupId>com.nvidia</groupId>
diff --git a/scala2.13/integration_tests/pom.xml b/scala2.13/integration_tests/pom.xml
index 1b9d2f2ea64..0f82e0b9186 100644
--- a/scala2.13/integration_tests/pom.xml
+++ b/scala2.13/integration_tests/pom.xml
@@ -22,11 +22,11 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-integration-tests_2.13</artifactId>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>integration_tests</rapids.module>
         <target.classifier/>
diff --git a/scala2.13/jdk-profiles/pom.xml b/scala2.13/jdk-profiles/pom.xml
index febb2bf230a..808031d488b 100644
--- a/scala2.13/jdk-profiles/pom.xml
+++ b/scala2.13/jdk-profiles/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
     </parent>
     <groupId>com.nvidia</groupId>
     <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
     <packaging>pom</packaging>
     <description>Shim JDK Profiles</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <profiles>
         <profile>
             <id>jdk8</id>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index d920a059d85..baebe599dc1 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -23,7 +23,7 @@
     <artifactId>rapids-4-spark-parent_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Root Project</name>
     <description>The root project of the RAPIDS Accelerator for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <packaging>pom</packaging>
 
     <url>https://nvidia.github.io/spark-rapids/</url>
@@ -105,6 +105,7 @@
                 <spark.version>${spark320.version}</spark.version>
                 <spark.test.version>${spark320.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -125,6 +126,7 @@
                 <spark.version>${spark321.version}</spark.version>
                 <spark.test.version>${spark321.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -145,6 +147,7 @@
                 <spark.version>${spark321cdh.version}</spark.version>
                 <spark.test.version>${spark321cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 --><!-- #endif scala-2.12 -->
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -172,6 +175,7 @@
                 <spark.version>${spark322.version}</spark.version>
                 <spark.test.version>${spark322.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -192,6 +196,7 @@
                 <spark.version>${spark323.version}</spark.version>
                 <spark.test.version>${spark323.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -212,6 +217,7 @@
                 <spark.version>${spark324.version}</spark.version>
                 <spark.test.version>${spark324.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-20x</rapids.delta.artifactId1>
             </properties>
             <modules>
                 <module>delta-lake/delta-20x</module>
@@ -234,6 +240,9 @@
                 <spark.version>${spark330.version}</spark.version>
                 <spark.test.version>${spark330.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -255,6 +264,9 @@
                 <spark.version>${spark331.version}</spark.version>
                 <spark.test.version>${spark331.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -276,6 +288,9 @@
                 <spark.version>${spark332.version}</spark.version>
                 <spark.test.version>${spark332.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -297,6 +312,9 @@
                 <spark.version>${spark333.version}</spark.version>
                 <spark.test.version>${spark333.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -318,6 +336,9 @@
                 <spark.version>${spark334.version}</spark.version>
                 <spark.test.version>${spark334.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.2</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -339,6 +360,7 @@
                 <spark.version>${spark340.version}</spark.version>
                 <spark.test.version>${spark340.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -359,6 +381,7 @@
                 <spark.version>${spark341.version}</spark.version>
                 <spark.test.version>${spark341.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -379,6 +402,7 @@
                 <spark.version>${spark342.version}</spark.version>
                 <spark.test.version>${spark342.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -399,6 +423,28 @@
                 <spark.version>${spark343.version}</spark.version>
                 <spark.test.version>${spark343.version}</spark.test.version>
                 <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.6</slf4j.version>
+            </properties>
+            <modules>
+                <module>delta-lake/delta-24x</module>
+            </modules>
+        </profile>
+        <profile>
+            <id>release344</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>344</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>344</buildver>
+                <spark.version>${spark344.version}</spark.version>
+                <spark.test.version>${spark344.version}</spark.test.version>
+                <parquet.hadoop.version>1.12.3</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-24x</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.6</slf4j.version>
             </properties>
@@ -419,6 +465,9 @@
                 <spark.version>${spark330cdh.version}</spark.version>
                 <spark.test.version>${spark330cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.99.7.1.8.0-801</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -445,6 +494,9 @@
                 <spark.version>${spark332cdh.version}</spark.version>
                 <spark.test.version>${spark332cdh.version}</spark.test.version>
                 <parquet.hadoop.version>1.10.99.7.1.9.0-387</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-21x</rapids.delta.artifactId1>
+                <rapids.delta.artifactId2>rapids-4-spark-delta-22x</rapids.delta.artifactId2>
+                <rapids.delta.artifactId3>rapids-4-spark-delta-23x</rapids.delta.artifactId3>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <cloudera.repo.enabled>true</cloudera.repo.enabled>
                 <!-- Keeping the scala plugin version 4.3.0 for details
@@ -475,6 +527,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -499,6 +552,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -522,6 +576,7 @@
                 <hadoop.client.version>3.3.1</hadoop.client.version>
                 <rat.consoleOutput>true</rat.consoleOutput>
                 <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
             </properties>
             <modules>
@@ -542,6 +597,7 @@
                 <spark.version>${spark350.version}</spark.version>
                 <spark.test.version>${spark350.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -549,6 +605,32 @@
                 <module>delta-lake/delta-stub</module>
             </modules>
         </profile>
+        <profile>
+            <!-- Note Databricks requires 2 properties -Ddatabricks and -Dbuildver=350db143 -->
+            <id>release350db143</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350db143</value>
+                </property>
+            </activation>
+            <properties>
+                <!-- Downgrade scala plugin version due to: https://github.com/sbt/sbt/issues/4305 -->
+                <scala.plugin.version>3.4.4</scala.plugin.version>
+                <spark.version.classifier>spark350db143</spark.version.classifier>
+                <spark.version>${spark350db143.version}</spark.version>
+                <spark.test.version>${spark350db143.version}</spark.test.version>
+                <hadoop.client.version>3.3.1</hadoop.client.version>
+                <rat.consoleOutput>true</rat.consoleOutput>
+                <parquet.hadoop.version>1.12.0</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-${spark.version.classifier}</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+            </properties>
+            <modules>
+                <module>shim-deps/databricks</module>
+                <module>delta-lake/delta-spark350db143</module>
+            </modules>
+        </profile>
         <profile>
             <id>release351</id>
             <activation>
@@ -562,6 +644,7 @@
                 <spark.version>${spark351.version}</spark.version>
                 <spark.test.version>${spark351.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -582,6 +665,28 @@
                 <spark.version>${spark352.version}</spark.version>
                 <spark.test.version>${spark352.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
+                <iceberg.version>${spark330.iceberg.version}</iceberg.version>
+                <slf4j.version>2.0.7</slf4j.version>
+            </properties>
+            <modules>
+                <module>delta-lake/delta-stub</module>
+            </modules>
+        </profile>
+        <profile>
+            <id>release353</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>353</value>
+                </property>
+            </activation>
+            <properties>
+                <buildver>353</buildver>
+                <spark.version>${spark353.version}</spark.version>
+                <spark.test.version>${spark353.version}</spark.test.version>
+                <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -603,6 +708,7 @@
                 <spark.version>${spark400.version}</spark.version>
                 <spark.test.version>${spark400.version}</spark.test.version>
                 <parquet.hadoop.version>1.13.1</parquet.hadoop.version>
+                <rapids.delta.artifactId1>rapids-4-spark-delta-stub</rapids.delta.artifactId1>
                 <iceberg.version>${spark330.iceberg.version}</iceberg.version>
                 <slf4j.version>2.0.7</slf4j.version>
             </properties>
@@ -707,7 +813,8 @@
 
     <properties>
         <!-- start dyn.shim properties -->
-        <dyn.shim.excluded.releases/>
+        <!-- TODO: will package 350db143 shim into the dist jar in branch-25.02, followed by https://github.com/NVIDIA/spark-rapids/issues/11749 -->
+	<dyn.shim.excluded.releases>350db143</dyn.shim.excluded.releases>
         <!-- end dyn.shim properties -->
 
         <rapids.module>.</rapids.module>
@@ -722,8 +829,8 @@
         <spark.version.classifier>spark${buildver}</spark.version.classifier>
         <cuda.version>cuda11</cuda.version>
         <jni.classifier>${cuda.version}</jni.classifier>
-        <spark-rapids-jni.version>24.10.0</spark-rapids-jni.version>
-        <spark-rapids-private.version>24.10.0</spark-rapids-private.version>
+        <spark-rapids-jni.version>24.12.0-SNAPSHOT</spark-rapids-jni.version>
+        <spark-rapids-private.version>24.12.0-SNAPSHOT</spark-rapids-private.version>
         <scala.binary.version>2.13</scala.binary.version>
         <alluxio.client.version>2.8.0</alluxio.client.version>
         <scala.recompileMode>incremental</scala.recompileMode>
@@ -749,6 +856,15 @@
         <rapids.shade.package>${spark.version.classifier}.com.nvidia.shaded.spark</rapids.shade.package>
         <rapids.shim.jar.phase>none</rapids.shim.jar.phase>
         <rapids.shim.jar.test.phase>package</rapids.shim.jar.test.phase>
+
+        <!--
+            Dummy value just to pass the Maven artifactId format check.
+            Enforcer Plugin checks for the proper value override for each releaseXYZ profile
+         -->
+        <rapids.delta.artifactId1>DEFINE_FOR_EVERY_SPARK_SHIM</rapids.delta.artifactId1>
+
+        <rapids.delta.artifactId2>${rapids.delta.artifactId1}</rapids.delta.artifactId2>
+        <rapids.delta.artifactId3>${rapids.delta.artifactId1}</rapids.delta.artifactId3>
         <test.include.tags/>
         <rapids.shuffle.manager.override>true</rapids.shuffle.manager.override>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
@@ -776,14 +892,17 @@
         <spark341.version>3.4.1</spark341.version>
         <spark342.version>3.4.2</spark342.version>
         <spark343.version>3.4.3</spark343.version>
+        <spark344.version>3.4.4</spark344.version>
         <spark330cdh.version>3.3.0.3.3.7180.0-274</spark330cdh.version>
         <spark332cdh.version>3.3.2.3.3.7190.0-91</spark332cdh.version>
         <spark330db.version>3.3.0-databricks</spark330db.version>
         <spark332db.version>3.3.2-databricks</spark332db.version>
         <spark341db.version>3.4.1-databricks</spark341db.version>
+        <spark350db143.version>3.5.0-databricks-143</spark350db143.version>
         <spark350.version>3.5.0</spark350.version>
         <spark351.version>3.5.1</spark351.version>
         <spark352.version>3.5.2</spark352.version>
+        <spark353.version>3.5.3</spark353.version>
         <spark400.version>4.0.0-SNAPSHOT</spark400.version>
         <mockito.version>3.12.4</mockito.version>
         <!-- same as Apache Spark 4.0.0 for Scala 2.13 except for cloudera shims -->
@@ -1467,6 +1586,11 @@ This will force full Scala code rebuild in downstream modules.
                           <message>Minimum Maven version 3.6.x required</message>
                           <version>[3.6,)</version>
                         </requireMavenVersion>
+                        <requireProperty>
+                            <regexMessage>At least one of rapids.delta.artifactId1, rapids.delta.artifactId2 ... is required in the POM profile "release${buildver}"</regexMessage>
+                            <property>rapids.delta.artifactId1</property>
+                            <regex>^rapids-4-spark-delta-.*</regex>
+                        </requireProperty>
                         <!-- #if scala-2.12 --><!--
                         <requireJavaVersion>
                             <message>Only Java 8, 11, and 17 are supported!</message>
@@ -1568,14 +1692,11 @@ This will force full Scala code rebuild in downstream modules.
                             <skip>${maven.scalastyle.skip}</skip>
                             <target>
                                 <pathconvert property="scalastyle.dirs" pathsep=" ">
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/*/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/*/scala"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/main/*/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/scala-${scala.binary.version}"/>
-                                    <dirset dir="${spark.rapids.source.basedir}" includes="**/src/test/*/scala-${scala.binary.version}"/>
+                                    <dirset dir="${spark.rapids.source.basedir}">
+                                        <include name="**/src/main"/>
+                                        <include name="**/src/test"/>
+                                        <exclude name="**/target/*/generated/src/**"/>
+                                    </dirset>
                                 </pathconvert>
                                 <echo>Checking scalastyle for all modules using following paths:
                                     ${scalastyle.dirs}
diff --git a/scala2.13/shim-deps/cloudera/pom.xml b/scala2.13/shim-deps/cloudera/pom.xml
index 439089021ec..bf89e7c373c 100644
--- a/scala2.13/shim-deps/cloudera/pom.xml
+++ b/scala2.13/shim-deps/cloudera/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-cdh-bom</artifactId>
     <packaging>pom</packaging>
     <description>CDH Shim Dependencies</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../shim-deps/cloudera</rapids.module>
diff --git a/scala2.13/shim-deps/databricks/pom.xml b/scala2.13/shim-deps/databricks/pom.xml
index a7fd1cb8418..1d7c047f407 100644
--- a/scala2.13/shim-deps/databricks/pom.xml
+++ b/scala2.13/shim-deps/databricks/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-db-bom</artifactId>
     <packaging>pom</packaging>
     <description>Databricks Shim Dependencies</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../shim-deps/databricks</rapids.module>
@@ -231,6 +231,12 @@
             <version>${spark.version}</version>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>com.github.luben</groupId>
+            <artifactId>zstd-jni</artifactId>
+            <version>${spark.version}</version>
+            <scope>compile</scope>
+        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-format</artifactId>
diff --git a/scala2.13/shim-deps/pom.xml b/scala2.13/shim-deps/pom.xml
index 8569601fe07..318909784ca 100644
--- a/scala2.13/shim-deps/pom.xml
+++ b/scala2.13/shim-deps/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
     <packaging>pom</packaging>
     <description>Shim Dependencies Profiles</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <profiles>
         <profile>
             <id>release321cdh</id>
@@ -159,6 +159,59 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350db143</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350db143</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.logging.log4j</groupId>
+                    <artifactId>log4j-core</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-format-internal_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>shaded.parquet.org.apache.thrift</groupId>
+                    <artifactId>shaded-parquet-thrift_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+	        </dependency>
+                <dependency>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro-connector</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.scala-lang.modules</groupId>
+                    <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>dbdeps</id>
             <activation>
@@ -194,4 +247,4 @@
             </dependencies>
         </profile>
     </profiles>
-</project>
\ No newline at end of file
+</project>
diff --git a/scala2.13/shuffle-plugin/pom.xml b/scala2.13/shuffle-plugin/pom.xml
index ddb5456133b..d8a61620d6f 100644
--- a/scala2.13/shuffle-plugin/pom.xml
+++ b/scala2.13/shuffle-plugin/pom.xml
@@ -21,13 +21,13 @@
     <parent>
       <groupId>com.nvidia</groupId>
       <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-      <version>24.10.1</version>
+      <version>24.12.0</version>
       <relativePath>../shim-deps/pom.xml</relativePath>
   </parent>
     <artifactId>rapids-4-spark-shuffle_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Shuffle Plugin</name>
     <description>Accelerated shuffle plugin for the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>shuffle-plugin</rapids.module>
diff --git a/scala2.13/sql-plugin-api/pom.xml b/scala2.13/sql-plugin-api/pom.xml
index 9cfda5bd60a..dbe2d8f0222 100644
--- a/scala2.13/sql-plugin-api/pom.xml
+++ b/scala2.13/sql-plugin-api/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-sql-plugin-api_2.13</artifactId>
     <description>Module for Non-Shimmable API</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>sql-plugin-api</rapids.module>
         <rapids.compressed.artifact>false</rapids.compressed.artifact>
diff --git a/scala2.13/sql-plugin/pom.xml b/scala2.13/sql-plugin/pom.xml
index 7384ff2c117..a58ae69c1a2 100644
--- a/scala2.13/sql-plugin/pom.xml
+++ b/scala2.13/sql-plugin/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-sql_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark SQL Plugin</name>
     <description>The RAPIDS SQL plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>sql-plugin</rapids.module>
diff --git a/scala2.13/tests/pom.xml b/scala2.13/tests/pom.xml
index 5f672bc10b9..33d1e1a761c 100644
--- a/scala2.13/tests/pom.xml
+++ b/scala2.13/tests/pom.xml
@@ -21,13 +21,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-tests_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Tests</name>
     <description>RAPIDS plugin for Apache Spark integration tests</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>tests</rapids.module>
diff --git a/scala2.13/tools/pom.xml b/scala2.13/tools/pom.xml
index 8fc1554fb80..257d4aaf432 100644
--- a/scala2.13/tools/pom.xml
+++ b/scala2.13/tools/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-tools-support</artifactId>
     <packaging>pom</packaging>
     <name>RAPIDS Accelerator for Apache Spark Tools Support</name>
     <description>Supporting code for RAPIDS Accelerator tools</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <dependencies>
         <dependency>
             <groupId>com.nvidia</groupId>
@@ -94,7 +94,7 @@
                                 <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <not>
-                                        <matches pattern="db$" string="${buildver}"/>
+                                        <matches pattern="db" string="${buildver}"/>
                                     </not>
                                     <then>
                                     <mkdir dir="${tools.datagen.dir}"/>
diff --git a/scala2.13/udf-compiler/pom.xml b/scala2.13/udf-compiler/pom.xml
index eb0773de984..b52f3afe9f2 100644
--- a/scala2.13/udf-compiler/pom.xml
+++ b/scala2.13/udf-compiler/pom.xml
@@ -21,13 +21,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.13</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-udf_2.13</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Scala UDF Plugin</name>
     <description>The RAPIDS Scala UDF plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>udf-compiler</rapids.module>
diff --git a/shim-deps/cloudera/pom.xml b/shim-deps/cloudera/pom.xml
index cd8cf19420b..fedc8d0931f 100644
--- a/shim-deps/cloudera/pom.xml
+++ b/shim-deps/cloudera/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-cdh-bom</artifactId>
     <packaging>pom</packaging>
     <description>CDH Shim Dependencies</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../shim-deps/cloudera</rapids.module>
diff --git a/shim-deps/databricks/pom.xml b/shim-deps/databricks/pom.xml
index 5e445907b09..fdc1cceccd4 100644
--- a/shim-deps/databricks/pom.xml
+++ b/shim-deps/databricks/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-db-bom</artifactId>
     <packaging>pom</packaging>
     <description>Databricks Shim Dependencies</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>../shim-deps/databricks</rapids.module>
@@ -231,6 +231,12 @@
             <version>${spark.version}</version>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>com.github.luben</groupId>
+            <artifactId>zstd-jni</artifactId>
+            <version>${spark.version}</version>
+            <scope>compile</scope>
+        </dependency>
         <dependency>
             <groupId>org.apache.arrow</groupId>
             <artifactId>arrow-format</artifactId>
diff --git a/shim-deps/pom.xml b/shim-deps/pom.xml
index e9912e479cc..580ae542eb5 100644
--- a/shim-deps/pom.xml
+++ b/shim-deps/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
     <packaging>pom</packaging>
     <description>Shim Dependencies Profiles</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <profiles>
         <profile>
             <id>release321cdh</id>
@@ -159,6 +159,59 @@
                 </dependency>
             </dependencies>
         </profile>
+        <profile>
+            <id>release350db143</id>
+            <activation>
+                <property>
+                    <name>buildver</name>
+                    <value>350db143</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.logging.log4j</groupId>
+                    <artifactId>log4j-core</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.parquet</groupId>
+                    <artifactId>parquet-format-internal_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.apache.spark</groupId>
+                    <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>shaded.parquet.org.apache.thrift</groupId>
+                    <artifactId>shaded-parquet-thrift_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+	        </dependency>
+                <dependency>
+                    <groupId>org.apache.avro</groupId>
+                    <artifactId>avro-connector</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+                <dependency>
+                    <groupId>org.scala-lang.modules</groupId>
+                    <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+                    <version>${spark.version}</version>
+                    <scope>provided</scope>
+                </dependency>
+            </dependencies>
+        </profile>
         <profile>
             <id>dbdeps</id>
             <activation>
@@ -194,4 +247,4 @@
             </dependencies>
         </profile>
     </profiles>
-</project>
\ No newline at end of file
+</project>
diff --git a/shuffle-plugin/pom.xml b/shuffle-plugin/pom.xml
index ea743953a85..1f3a7dd09d1 100644
--- a/shuffle-plugin/pom.xml
+++ b/shuffle-plugin/pom.xml
@@ -21,13 +21,13 @@
     <parent>
       <groupId>com.nvidia</groupId>
       <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-      <version>24.10.1</version>
+      <version>24.12.0</version>
       <relativePath>../shim-deps/pom.xml</relativePath>
   </parent>
     <artifactId>rapids-4-spark-shuffle_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Shuffle Plugin</name>
     <description>Accelerated shuffle plugin for the RAPIDS plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>shuffle-plugin</rapids.module>
diff --git a/sql-plugin-api/pom.xml b/sql-plugin-api/pom.xml
index 1bbc06958bc..1bff34c1d61 100644
--- a/sql-plugin-api/pom.xml
+++ b/sql-plugin-api/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
 
     <artifactId>rapids-4-spark-sql-plugin-api_2.12</artifactId>
     <description>Module for Non-Shimmable API</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <properties>
         <rapids.module>sql-plugin-api</rapids.module>
         <rapids.compressed.artifact>false</rapids.compressed.artifact>
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/DFUDFPlugin.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/DFUDFPlugin.scala
new file mode 100644
index 00000000000..3322dd3fd11
--- /dev/null
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/DFUDFPlugin.scala
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark
+
+import com.nvidia.spark.rapids.{DFUDFPluginAPI, ShimLoader}
+
+import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+class DFUDFPlugin extends (SparkSessionExtensions => Unit) {
+  private lazy val impl: DFUDFPluginAPI = ShimLoader.newDFUDFImpl()
+
+  override def apply(extensions: SparkSessionExtensions): Unit =
+    impl(extensions)
+
+  def logicalPlanRules(sparkSession: SparkSession): Rule[LogicalPlan] =
+    impl.logicalPlanRules(sparkSession)
+}
\ No newline at end of file
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/functions.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/functions.scala
new file mode 100644
index 00000000000..d3bce5af637
--- /dev/null
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/functions.scala
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark
+
+import com.nvidia.spark.rapids.{Functions, ShimLoader}
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.api.java.{UDF0, UDF1, UDF10, UDF2, UDF3, UDF4, UDF5, UDF6, UDF7, UDF8, UDF9}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+
+// scalastyle:off
+object functions {
+// scalastyle:on
+
+  private lazy val impl: Functions = ShimLoader.newFunctionsImpl()
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function0[Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function1[Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function2[Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function3[Column, Column, Column, Column]): UserDefinedFunction =
+    impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function4[Column, Column, Column, Column, Column]): UserDefinedFunction =
+    impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function5[Column, Column, Column, Column, Column, Column]): UserDefinedFunction =
+    impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Java UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF0[Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF1[Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF2[Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF3[Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF4[Column, Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF5[Column, Column, Column, Column, Column, Column]): UserDefinedFunction =
+    impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction = impl.df_udf(f)
+}
\ No newline at end of file
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/DFUDFPluginAPI.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/DFUDFPluginAPI.scala
new file mode 100644
index 00000000000..1ca16ea1873
--- /dev/null
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/DFUDFPluginAPI.scala
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+trait DFUDFPluginAPI {
+  def apply(extensions: SparkSessionExtensions): Unit
+
+  def logicalPlanRules(sparkSession: SparkSession): Rule[LogicalPlan]
+}
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/Functions.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/Functions.scala
new file mode 100644
index 00000000000..b9979d03186
--- /dev/null
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/Functions.scala
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.api.java.{UDF0, UDF1, UDF10, UDF2, UDF3, UDF4, UDF5, UDF6, UDF7, UDF8, UDF9}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+
+trait Functions {
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function0[Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function1[Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function2[Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function3[Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function4[Column, Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function5[Column, Column, Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: Function10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction
+
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Java UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF0[Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF1[Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF2[Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF3[Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF4[Column, Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF5[Column, Column, Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  def df_udf(f: UDF10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction
+}
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
index e3e75120257..23a9ece7468 100644
--- a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
@@ -48,11 +48,11 @@ import org.apache.spark.util.MutableURLClassLoader
     Each shim can see a consistent parallel world without conflicts by referencing
     only one conflicting directory.
     E.g., Spark 3.2.0 Shim will use
-    jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark-shared/
-    jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark320/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark-shared/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark320/
     Spark 3.3.1 will use
-    jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark-shared/
-    jar:file:/home/spark/rapids-4-spark_2.12-24.10.1.jar!/spark331/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark-shared/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.12.0.jar!/spark331/
     Using these Jar URL's allows referencing different bytecode produced from identical sources
     by incompatible Scala / Spark dependencies.
  */
@@ -360,6 +360,14 @@ object ShimLoader {
       newInstanceOf("com.nvidia.spark.rapids.InternalExclusiveModeGpuDiscoveryPlugin")
   }
 
+  def newFunctionsImpl(): Functions = {
+    ShimReflectionUtils.newInstanceOf("com.nvidia.spark.FunctionsImpl")
+  }
+
+  def newDFUDFImpl(): DFUDFPluginAPI = {
+    ShimReflectionUtils.newInstanceOf("com.nvidia.spark.DFUDFPluginImpl")
+  }
+
   def loadColumnarRDD(): Class[_] = {
     ShimReflectionUtils.
       loadClass("org.apache.spark.sql.rapids.execution.InternalColumnarRddConverter")
diff --git a/sql-plugin/pom.xml b/sql-plugin/pom.xml
index b383a969507..de1b9128481 100644
--- a/sql-plugin/pom.xml
+++ b/sql-plugin/pom.xml
@@ -22,13 +22,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-sql_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark SQL Plugin</name>
     <description>The RAPIDS SQL plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>sql-plugin</rapids.module>
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
index 32743cc12ef..f2be4264162 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuColumnVector.java
@@ -237,6 +237,7 @@ public void close() {
   public static final class GpuColumnarBatchBuilder extends GpuColumnarBatchBuilderBase {
     private final RapidsHostColumnBuilder[] builders;
     private ai.rapids.cudf.HostColumnVector[] hostColumns;
+    private ai.rapids.cudf.HostColumnVector[] wipHostColumns;
 
     /**
      * A collection of builders for building up columnar data.
@@ -280,29 +281,30 @@ public RapidsHostColumnBuilder builder(int i) {
     @Override
     protected ai.rapids.cudf.ColumnVector buildAndPutOnDevice(int builderIndex) {
       ai.rapids.cudf.ColumnVector cv = builders[builderIndex].buildAndPutOnDevice();
+      builders[builderIndex].close();
       builders[builderIndex] = null;
       return cv;
     }
 
     public HostColumnVector[] buildHostColumns() {
-      HostColumnVector[] vectors = new HostColumnVector[builders.length];
-      try {
-        for (int i = 0; i < builders.length; i++) {
-          vectors[i] = builders[i].build();
+      // buildHostColumns is called from tryBuild, and tryBuild has to be safe to call
+      // multiple times, so if a retry exception happens in this code, we need to pick
+      // up where we left off last time.
+      if (wipHostColumns == null) {
+        wipHostColumns = new HostColumnVector[builders.length];
+      }
+      for (int i = 0; i < builders.length; i++) {
+        if (builders[i] != null && wipHostColumns[i] == null) {
+          wipHostColumns[i] = builders[i].build();
+          builders[i].close();
           builders[i] = null;
-        }
-        HostColumnVector[] result = vectors;
-        vectors = null;
-        return result;
-      } finally {
-        if (vectors != null) {
-          for (HostColumnVector v : vectors) {
-            if (v != null) {
-              v.close();
-            }
-          }
+        } else if (builders[i] == null && wipHostColumns[i] == null) {
+          throw new IllegalStateException("buildHostColumns cannot be called more than once");
         }
       }
+      HostColumnVector[] result = wipHostColumns;
+      wipHostColumns = null;
+      return result;
     }
 
     /**
@@ -327,13 +329,24 @@ public void close() {
           }
         }
       } finally {
-        if (hostColumns != null) {
-          for (ai.rapids.cudf.HostColumnVector hcv: hostColumns) {
-            if (hcv != null) {
-              hcv.close();
+        try {
+          if (hostColumns != null) {
+            for (ai.rapids.cudf.HostColumnVector hcv : hostColumns) {
+              if (hcv != null) {
+                hcv.close();
+              }
             }
+            hostColumns = null;
+          }
+        } finally {
+          if (wipHostColumns != null) {
+            for (ai.rapids.cudf.HostColumnVector hcv : wipHostColumns) {
+              if (hcv != null) {
+                hcv.close();
+              }
+            }
+            wipHostColumns = null;
           }
-          hostColumns = null;
         }
       }
     }
@@ -515,6 +528,55 @@ public static Schema from(StructType input) {
     return builder.build();
   }
 
+  /**
+   * Converts a list of Spark data types to a cudf schema.
+   * <br/>
+   *
+   * This method correctly handles nested types, but will generate random field names.
+   *
+   * @param dataTypes the list of data types to convert
+   * @return the cudf schema
+   */
+  public static Schema from(DataType[] dataTypes) {
+    Schema.Builder builder = Schema.builder();
+    visit(dataTypes, builder, 0);
+    return builder.build();
+  }
+
+  private static void visit(DataType[] dataTypes, Schema.Builder builder, int level) {
+    for (int idx = 0; idx < dataTypes.length; idx ++) {
+      DataType dt = dataTypes[idx];
+      String name = "_col_" + level + "_" + idx;
+      if (dt instanceof MapType) {
+        // MapType is list of struct in cudf, so need to handle it specially.
+        Schema.Builder listBuilder = builder.addColumn(DType.LIST, name);
+        Schema.Builder structBuilder = listBuilder.addColumn(DType.STRUCT, name + "_map");
+        MapType mt = (MapType) dt;
+        DataType[] structChildren = {mt.keyType(), mt.valueType()};
+        visit(structChildren, structBuilder, level + 1);
+      } else if (dt instanceof BinaryType) {
+        Schema.Builder listBuilder = builder.addColumn(DType.LIST, name);
+        listBuilder.addColumn(DType.UINT8, name + "_bytes");
+      } else {
+        Schema.Builder childBuilder = builder.addColumn(GpuColumnVector.getRapidsType(dt), name);
+        if (dt instanceof ArrayType) {
+          // Array (aka List)
+          DataType[] childType = {((ArrayType) dt).elementType()};
+          visit(childType, childBuilder, level + 1);
+        } else if (dt instanceof StructType) {
+          // Struct
+          StructType st = (StructType) dt;
+          DataType[] childrenTypes = new DataType[st.length()];
+          for (int i = 0; i < childrenTypes.length; i ++) {
+            childrenTypes[i] = st.apply(i).dataType();
+          }
+          visit(childrenTypes, childBuilder, level + 1);
+        }
+      }
+    }
+  }
+
+
   /**
    * Convert a ColumnarBatch to a table. The table will increment the reference count for all of
    * the columns in the batch, so you will need to close both the batch passed in and the table
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/RapidsHostColumnBuilder.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/RapidsHostColumnBuilder.java
index d9d8411643b..e6ee5eb9de5 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/RapidsHostColumnBuilder.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/RapidsHostColumnBuilder.java
@@ -49,7 +49,6 @@ public final class RapidsHostColumnBuilder implements AutoCloseable {
   private long estimatedRows;
   private long rowCapacity = 0L;
   private long validCapacity = 0L;
-  private boolean built = false;
   private List<RapidsHostColumnBuilder> childBuilders = new ArrayList<>();
   private Runnable nullHandler;
 
@@ -117,30 +116,76 @@ private void setupNullHandler() {
 
   public HostColumnVector build() {
     List<HostColumnVectorCore> hostColumnVectorCoreList = new ArrayList<>();
-    for (RapidsHostColumnBuilder childBuilder : childBuilders) {
-      hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
-    }
-    // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
-    if (valid != null) {
-      growValidBuffer();
+    HostColumnVector hostColumnVector = null;
+    try {
+      for (RapidsHostColumnBuilder childBuilder : childBuilders) {
+        hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
+      }
+      // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
+      if (valid != null) {
+        growValidBuffer();
+      }
+      // Increment the reference counts before creating the HostColumnVector, so we can
+      // keep track of them properly
+      if (data != null) {
+        data.incRefCount();
+      }
+      if (valid != null) {
+        valid.incRefCount();
+      }
+      if (offsets != null) {
+        offsets.incRefCount();
+      }
+      hostColumnVector = new HostColumnVector(type, rows,
+          Optional.of(nullCount), data, valid, offsets, hostColumnVectorCoreList);
+    } finally {
+      if (hostColumnVector == null) {
+        // Something bad happened, and we need to clean up after ourselves
+        for (HostColumnVectorCore hcv : hostColumnVectorCoreList) {
+          if (hcv != null) {
+            hcv.close();
+          }
+        }
+      }
     }
-    HostColumnVector hostColumnVector = new HostColumnVector(type, rows,
-        Optional.of(nullCount), data, valid, offsets, hostColumnVectorCoreList);
-    built = true;
     return hostColumnVector;
   }
 
   private HostColumnVectorCore buildNestedInternal() {
     List<HostColumnVectorCore> hostColumnVectorCoreList = new ArrayList<>();
-    for (RapidsHostColumnBuilder childBuilder : childBuilders) {
-      hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
-    }
-    // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
-    if (valid != null) {
-      growValidBuffer();
+    HostColumnVectorCore ret = null;
+    try {
+      for (RapidsHostColumnBuilder childBuilder : childBuilders) {
+        hostColumnVectorCoreList.add(childBuilder.buildNestedInternal());
+      }
+      // Aligns the valid buffer size with other buffers in terms of row size, because it grows lazily.
+      if (valid != null) {
+        growValidBuffer();
+      }
+      // Increment the reference counts before creating the HostColumnVector, so we can
+      // keep track of them properly
+      if (data != null) {
+        data.incRefCount();
+      }
+      if (valid != null) {
+        valid.incRefCount();
+      }
+      if (offsets != null) {
+        offsets.incRefCount();
+      }
+      ret = new HostColumnVectorCore(type, rows, Optional.of(nullCount), data, valid,
+          offsets, hostColumnVectorCoreList);
+    } finally {
+      if (ret == null) {
+        // Something bad happened, and we need to clean up after ourselves
+        for (HostColumnVectorCore hcv : hostColumnVectorCoreList) {
+          if (hcv != null) {
+            hcv.close();
+          }
+        }
+      }
     }
-    return new HostColumnVectorCore(type, rows, Optional.of(nullCount), data, valid,
-        offsets, hostColumnVectorCoreList);
+    return ret;
   }
 
   @SuppressWarnings({"rawtypes", "unchecked"})
@@ -650,23 +695,20 @@ public final ColumnVector buildAndPutOnDevice() {
 
   @Override
   public void close() {
-    if (!built) {
-      if (data != null) {
-        data.close();
-        data = null;
-      }
-      if (valid != null) {
-        valid.close();
-        valid = null;
-      }
-      if (offsets != null) {
-        offsets.close();
-        offsets = null;
-      }
-      for (RapidsHostColumnBuilder childBuilder : childBuilders) {
-        childBuilder.close();
-      }
-      built = true;
+    if (data != null) {
+      data.close();
+      data = null;
+    }
+    if (valid != null) {
+      valid.close();
+      valid = null;
+    }
+    if (offsets != null) {
+      offsets.close();
+      offsets = null;
+    }
+    for (RapidsHostColumnBuilder childBuilder : childBuilders) {
+      childBuilder.close();
     }
   }
 
@@ -685,7 +727,6 @@ public String toString() {
         ", nullCount=" + nullCount +
         ", estimatedRows=" + estimatedRows +
         ", populatedRows=" + rows +
-        ", built=" + built +
         '}';
   }
 }
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java
index 47b649af6ed..c61f7c6b6f7 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/parquet/GpuParquetReader.java
@@ -25,6 +25,7 @@
 
 import scala.collection.Seq;
 
+import com.nvidia.spark.rapids.CpuCompressionConfig$;
 import com.nvidia.spark.rapids.DateTimeRebaseCorrected$;
 import com.nvidia.spark.rapids.GpuMetric;
 import com.nvidia.spark.rapids.GpuParquetUtils;
@@ -144,6 +145,7 @@ public org.apache.iceberg.io.CloseableIterator<ColumnarBatch> iterator() {
           partReaderSparkSchema, debugDumpPrefix, debugDumpAlways,
           maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, useChunkedReader,
           maxChunkedReaderMemoryUsageSizeBytes,
+          CpuCompressionConfig$.MODULE$.disabled(),
           metrics,
           DateTimeRebaseCorrected$.MODULE$, // dateRebaseMode
           DateTimeRebaseCorrected$.MODULE$, // timestampRebaseMode
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java
index 9c36fe76020..b32e5e755cb 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/iceberg/spark/source/GpuMultiFileBatchReader.java
@@ -352,7 +352,8 @@ protected FilePartitionReaderBase createRapidsReader(PartitionedFile[] pFiles,
       return new MultiFileCloudParquetPartitionReader(conf, pFiles,
           this::filterParquetBlocks, caseSensitive, parquetDebugDumpPrefix, parquetDebugDumpAlways,
           maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, maxGpuColumnSizeBytes,
-          useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, metrics, partitionSchema,
+          useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes,
+          CpuCompressionConfig$.MODULE$.disabled(), metrics, partitionSchema,
           numThreads, maxNumFileProcessed,
           false, // ignoreMissingFiles
           false, // ignoreCorruptFiles
@@ -411,7 +412,7 @@ protected FilePartitionReaderBase createRapidsReader(PartitionedFile[] pFiles,
           JavaConverters.asJavaCollection(filteredInfo.parquetBlockMeta.blocks()).stream()
             .map(b -> ParquetSingleDataBlockMeta.apply(
                 filteredInfo.parquetBlockMeta.filePath(),
-                ParquetDataBlock.apply(b),
+                ParquetDataBlock.apply(b, CpuCompressionConfig$.MODULE$.disabled()),
                 InternalRow.empty(),
                 ParquetSchemaWrapper.apply(filteredInfo.parquetBlockMeta.schema()),
                 filteredInfo.parquetBlockMeta.readSchema(),
@@ -431,6 +432,7 @@ protected FilePartitionReaderBase createRapidsReader(PartitionedFile[] pFiles,
           caseSensitive, parquetDebugDumpPrefix, parquetDebugDumpAlways,
           maxBatchSizeRows, maxBatchSizeBytes, targetBatchSizeBytes, maxGpuColumnSizeBytes,
           useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes,
+          CpuCompressionConfig$.MODULE$.disabled(),
           metrics, partitionSchema, numThreads,
           false, // ignoreMissingFiles
           false, // ignoreCorruptFiles
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/DFUDFPluginImpl.scala b/sql-plugin/src/main/scala/com/nvidia/spark/DFUDFPluginImpl.scala
new file mode 100644
index 00000000000..ad1511d9aaf
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/DFUDFPluginImpl.scala
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark
+
+import com.nvidia.spark.rapids.DFUDFPluginAPI
+
+import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+class DFUDFPluginImpl extends DFUDFPluginAPI {
+  override def apply(extensions: SparkSessionExtensions): Unit = {
+    extensions.injectResolutionRule(logicalPlanRules)
+  }
+
+  override def logicalPlanRules(sparkSession: SparkSession): Rule[LogicalPlan] = {
+    org.apache.spark.sql.nvidia.LogicalPlanRules()
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/FunctionsImpl.scala b/sql-plugin/src/main/scala/com/nvidia/spark/FunctionsImpl.scala
new file mode 100644
index 00000000000..7c27cb79054
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/FunctionsImpl.scala
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark
+
+import com.nvidia.spark.rapids.Functions
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.api.java.{UDF0, UDF1, UDF10, UDF2, UDF3, UDF4, UDF5, UDF6, UDF7, UDF8, UDF9}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.functions.{udf => sp_udf}
+import org.apache.spark.sql.nvidia._
+import org.apache.spark.sql.types.LongType
+
+// scalastyle:off
+class FunctionsImpl extends Functions {
+// scalastyle:on
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function0[Column]): UserDefinedFunction =
+    sp_udf(DFUDF0(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function1[Column, Column]): UserDefinedFunction =
+    sp_udf(DFUDF1(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function2[Column, Column, Column]): UserDefinedFunction =
+    sp_udf(DFUDF2(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function3[Column, Column, Column, Column]): UserDefinedFunction =
+    sp_udf(DFUDF3(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function4[Column, Column, Column, Column, Column]): UserDefinedFunction =
+    sp_udf(DFUDF4(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function5[Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = sp_udf(DFUDF5(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = sp_udf(DFUDF6(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction = sp_udf(DFUDF7(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction = sp_udf(DFUDF8(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction = sp_udf(DFUDF9(f), LongType)
+
+  /**
+   * Defines a Scala closure of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to
+   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: Function10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction = sp_udf(DFUDF10(f), LongType)
+
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Java UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF0[Column]): UserDefinedFunction =
+    sp_udf(JDFUDF0(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF1[Column, Column]): UserDefinedFunction =
+    sp_udf(JDFUDF1(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF2[Column, Column, Column]): UserDefinedFunction =
+    sp_udf(JDFUDF2(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF3[Column, Column, Column, Column]): UserDefinedFunction =
+    sp_udf(JDFUDF3(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF4[Column, Column, Column, Column, Column]): UserDefinedFunction =
+    sp_udf(JDFUDF4(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF5[Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = sp_udf(JDFUDF5(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF6[Column, Column, Column, Column, Column, Column,
+    Column]): UserDefinedFunction = sp_udf(JDFUDF6(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF7[Column, Column, Column, Column, Column, Column,
+    Column, Column]): UserDefinedFunction = sp_udf(JDFUDF7(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF8[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column]): UserDefinedFunction = sp_udf(JDFUDF8(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF9[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column]): UserDefinedFunction = sp_udf(JDFUDF9(f), LongType)
+
+  /**
+   * Defines a Java UDF instance of Columns as user-defined function (UDF).
+   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+   * API `UserDefinedFunction.asNondeterministic()`.
+   */
+  override def df_udf(f: UDF10[Column, Column, Column, Column, Column, Column,
+    Column, Column, Column, Column, Column]): UserDefinedFunction = sp_udf(JDFUDF10(f), LongType)
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Arm.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Arm.scala
index 926f770a683..b0cd798c179 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Arm.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Arm.scala
@@ -16,7 +16,7 @@
 package com.nvidia.spark.rapids
 
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import scala.util.control.ControlThrowable
 
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
@@ -134,6 +134,20 @@ object Arm extends ArmScalaSpecificImpl {
     }
   }
 
+  /** Executes the provided code block, closing the resources only if an exception occurs */
+  def closeOnExcept[T <: AutoCloseable, V](r: ListBuffer[T])(block: ListBuffer[T] => V): V = {
+    try {
+      block(r)
+    } catch {
+      case t: ControlThrowable =>
+        // Don't close for these cases..
+        throw t
+      case t: Throwable =>
+        r.safeClose(t)
+        throw t
+    }
+  }
+
 
   /** Executes the provided code block, closing the resources only if an exception occurs */
   def closeOnExcept[T <: AutoCloseable, V](r: mutable.Queue[T])(block: mutable.Queue[T] => V): V = {
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/AutoClosableArrayBuffer.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/AutoClosableArrayBuffer.scala
new file mode 100644
index 00000000000..fb1e10b9c9e
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/AutoClosableArrayBuffer.scala
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.nvidia.spark.rapids
+
+import scala.collection.mutable.ArrayBuffer
+import scala.reflect.ClassTag
+
+/**
+ * Just a simple wrapper to make working with buffers of AutoClosable things play
+ * nicely with withResource.
+ */
+class AutoClosableArrayBuffer[T <: AutoCloseable] extends AutoCloseable {
+  val data = new ArrayBuffer[T]()
+
+  def append(scb: T): Unit = data.append(scb)
+
+  def last: T = data.last
+
+  def removeLast(): T = data.remove(data.length - 1)
+
+  def foreach[U](f: T => U): Unit = data.foreach(f)
+
+  def map[U](f: T => U): Seq[U] = data.map(f).toSeq
+
+  def toArray[B >: T : ClassTag]: Array[B] = data.toArray
+
+  def size(): Int = data.size
+
+  def clear(): Unit = data.clear()
+
+  def forall(p: T => Boolean): Boolean = data.forall(p)
+
+  def iterator: Iterator[T] = data.iterator
+
+  override def toString: String = s"AutoCloseable(${super.toString})"
+
+  override def close(): Unit = {
+    data.foreach(_.close())
+    data.clear()
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ColumnarOutputWriter.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ColumnarOutputWriter.scala
index 69157c046b6..df62683d346 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ColumnarOutputWriter.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/ColumnarOutputWriter.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,11 +25,13 @@ import com.nvidia.spark.Retryable
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.RmmRapidsRetryIterator.{splitSpillableInHalfByRows, withRestoreOnRetry, withRetry, withRetryNoSplit}
+import com.nvidia.spark.rapids.io.async.{AsyncOutputStream, TrafficController}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FSDataOutputStream, Path}
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
 import org.apache.spark.TaskContext
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.rapids.{ColumnarWriteTaskStatsTracker, GpuWriteTaskStatsTracker}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -70,21 +72,31 @@ abstract class ColumnarOutputWriterFactory extends Serializable {
 abstract class ColumnarOutputWriter(context: TaskAttemptContext,
     dataSchema: StructType,
     rangeName: String,
-    includeRetry: Boolean) extends HostBufferConsumer {
+    includeRetry: Boolean,
+    holdGpuBetweenBatches: Boolean = false) extends HostBufferConsumer with Logging {
 
   protected val tableWriter: TableWriter
 
   protected val conf: Configuration = context.getConfiguration
 
-  // This is implemented as a method to make it easier to subclass
-  // ColumnarOutputWriter in the tests, and override this behavior.
-  protected def getOutputStream: FSDataOutputStream = {
+  private val trafficController: Option[TrafficController] = TrafficController.getInstance
+
+  private def openOutputStream(): OutputStream = {
     val hadoopPath = new Path(path)
     val fs = hadoopPath.getFileSystem(conf)
     fs.create(hadoopPath, false)
   }
 
-  protected val outputStream: FSDataOutputStream = getOutputStream
+  // This is implemented as a method to make it easier to subclass
+  // ColumnarOutputWriter in the tests, and override this behavior.
+  protected def getOutputStream: OutputStream = {
+    trafficController.map(controller => {
+      logWarning("Async output write enabled")
+      new AsyncOutputStream(() => openOutputStream(), controller)
+    }).getOrElse(openOutputStream())
+  }
+
+  protected val outputStream: OutputStream = getOutputStream
 
   private[this] val tempBuffer = new Array[Byte](128 * 1024)
   private[this] var anythingWritten = false
@@ -166,7 +178,11 @@ abstract class ColumnarOutputWriter(context: TaskAttemptContext,
     }
     // we successfully buffered to host memory, release the semaphore and write
     // the buffered data to the FS
-    GpuSemaphore.releaseIfNecessary(TaskContext.get)
+    if (!holdGpuBetweenBatches) {
+      logDebug("Releasing semaphore between batches")
+      GpuSemaphore.releaseIfNecessary(TaskContext.get)
+    }
+
     writeBufferedData()
     updateStatistics(writeStartTime, gpuTime, statsTrackers)
     spillableBatch.numRows()
@@ -202,6 +218,10 @@ abstract class ColumnarOutputWriter(context: TaskAttemptContext,
       // buffer an empty batch on close() to work around issues in cuDF
       // where corrupt files can be written if nothing is encoded via the writer.
       anythingWritten = true
+
+      // tableWriter.write() serializes the table into the HostMemoryBuffer, and buffers it
+      // by calling handleBuffer() on the ColumnarOutputWriter. It may not write to the
+      // output stream just yet.
       tableWriter.write(table)
     }
   }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
index b5360a62f94..d5bbe15209d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuAggregateExec.scala
@@ -16,11 +16,9 @@
 
 package com.nvidia.spark.rapids
 
-import java.util
-
 import scala.annotation.tailrec
-import scala.collection.JavaConverters.collectionAsScalaIterableConverter
 import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
 
 import ai.rapids.cudf
 import ai.rapids.cudf.{NvtxColor, NvtxRange}
@@ -37,7 +35,7 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, AttributeSeq, AttributeSet, Expression, ExprId, If, NamedExpression, NullsFirst, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, AttributeSeq, AttributeSet, Expression, ExprId, If, NamedExpression, SortOrder}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, HashPartitioning, Partitioning, UnspecifiedDistribution}
@@ -47,11 +45,11 @@ import org.apache.spark.sql.execution.{ExplainUtils, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.aggregate.{CpuToGpuAggregateBufferConverter, CudfAggregate, GpuAggregateExpression, GpuToCpuAggregateBufferConverter}
-import org.apache.spark.sql.rapids.execution.{GpuShuffleMeta, TrampolineUtil}
+import org.apache.spark.sql.rapids.execution.{GpuBatchSubPartitioner, GpuShuffleMeta, TrampolineUtil}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
-object AggregateUtils {
+object AggregateUtils extends Logging {
 
   private val aggs = List("min", "max", "avg", "sum", "count", "first", "last")
 
@@ -98,8 +96,10 @@ object AggregateUtils {
       inputTypes: Seq[DataType],
       outputTypes: Seq[DataType],
       isReductionOnly: Boolean): Long = {
+
     def typesToSize(types: Seq[DataType]): Long =
       types.map(GpuBatchUtils.estimateGpuMemory(_, nullable = false, rowCount = 1)).sum
+
     val inputRowSize = typesToSize(inputTypes)
     val outputRowSize = typesToSize(outputTypes)
     // The cudf hash table implementation allocates four 32-bit integers per input row.
@@ -120,22 +120,203 @@ object AggregateUtils {
     }
 
     // Calculate the max rows that can be processed during computation within the budget
-    val maxRows = totalBudget / computationBytesPerRow
+    // Make sure it's not less than 1, otherwise some corner test cases may fail
+    val maxRows = Math.max(totalBudget / computationBytesPerRow, 1)
 
     // Finally compute the input target batching size taking into account the cudf row limits
     Math.min(inputRowSize * maxRows, Int.MaxValue)
   }
+
+  /**
+   * Concatenate batches together and perform a merge aggregation on the result. The input batches
+   * will be closed as part of this operation.
+   *
+   * @param batches batches to concatenate and merge aggregate
+   * @return lazy spillable batch which has NOT been marked spillable
+   */
+  def concatenateAndMerge(
+      batches: mutable.ArrayBuffer[SpillableColumnarBatch],
+      metrics: GpuHashAggregateMetrics,
+      concatAndMergeHelper: AggHelper): SpillableColumnarBatch = {
+    // TODO: concatenateAndMerge (and calling code) could output a sequence
+    //   of batches for the partial aggregate case. This would be done in case
+    //   a retry failed a certain number of times.
+    val concatBatch = withResource(batches) { _ =>
+      val concatSpillable = concatenateBatches(metrics, batches.toSeq)
+      withResource(concatSpillable) {
+        _.getColumnarBatch()
+      }
+    }
+    computeAggregateAndClose(metrics, concatBatch, concatAndMergeHelper)
+  }
+
+  /**
+   * Try to concat and merge neighbour input batches to reduce the number of output batches.
+   * For some cases where input is highly aggregate-able, we can merge multiple input batches
+   * into a single output batch. In such cases we can skip repartition at all.
+   */
+  def streamAggregateNeighours(
+      aggregatedBatches: CloseableBufferedIterator[SpillableColumnarBatch],
+      metrics: GpuHashAggregateMetrics,
+      targetMergeBatchSize: Long,
+      configuredTargetBatchSize: Long,
+      helper: AggHelper
+  ): Iterator[SpillableColumnarBatch] = {
+    new Iterator[SpillableColumnarBatch] {
+
+      override def hasNext: Boolean = aggregatedBatches.hasNext
+
+      override def next(): SpillableColumnarBatch = {
+        closeOnExcept(new ArrayBuffer[SpillableColumnarBatch]) { stagingBatches => {
+          var currentSize = 0L
+          while (aggregatedBatches.hasNext) {
+            val nextBatch = aggregatedBatches.head
+            if (currentSize + nextBatch.sizeInBytes > targetMergeBatchSize) {
+              if (stagingBatches.size == 1) {
+                return stagingBatches.head
+              } else if (stagingBatches.isEmpty) {
+                aggregatedBatches.next
+                return nextBatch
+              }
+              val merged = concatenateAndMerge(stagingBatches, metrics, helper)
+              stagingBatches.clear
+              currentSize = 0L
+              if (merged.sizeInBytes < configuredTargetBatchSize * 0.5) {
+                stagingBatches += merged
+                currentSize += merged.sizeInBytes
+              } else {
+                return merged
+              }
+            } else {
+              stagingBatches.append(nextBatch)
+              currentSize += nextBatch.sizeInBytes
+              aggregatedBatches.next
+            }
+          }
+
+          if (stagingBatches.size == 1) {
+            return stagingBatches.head
+          }
+          concatenateAndMerge(stagingBatches, metrics, helper)
+        }
+        }
+      }
+    }
+  }
+
+  /**
+   * Read the input batches and repartition them into buckets.
+   */
+  def iterateAndRepartition(
+      aggregatedBatches: Iterator[SpillableColumnarBatch],
+      metrics: GpuHashAggregateMetrics,
+      targetMergeBatchSize: Long,
+      helper: AggHelper,
+      hashKeys: Seq[GpuExpression],
+      hashBucketNum: Int,
+      hashSeed: Int,
+      batchesByBucket: ArrayBuffer[AutoClosableArrayBuffer[SpillableColumnarBatch]]
+  ): Boolean = {
+
+    var repartitionHappened = false
+
+    def repartitionAndClose(batch: SpillableColumnarBatch): Unit = {
+
+      // OPTIMIZATION
+      if (!aggregatedBatches.hasNext && batchesByBucket.forall(_.size() == 0)) {
+        // If this is the only batch (after merging neighbours) to be repartitioned,
+        // we can just add it to the first bucket and skip repartitioning.
+        // This is a common case when total input size can fit into a single batch.
+        batchesByBucket.head.append(batch)
+        return
+      }
+
+      withResource(new NvtxWithMetrics("agg repartition",
+        NvtxColor.CYAN, metrics.repartitionTime)) { _ =>
+
+        withResource(new GpuBatchSubPartitioner(
+          Seq(batch).map(batch => {
+            withResource(batch) { _ =>
+              batch.getColumnarBatch()
+            }
+          }).iterator,
+          hashKeys, hashBucketNum, hashSeed, "aggRepartition")) {
+          partitioner => {
+            (0 until partitioner.partitionsCount).foreach { id =>
+              closeOnExcept(batchesByBucket) { _ => {
+                val newBatches = partitioner.releaseBatchesByPartition(id)
+                newBatches.foreach { newBatch =>
+                  if (newBatch.numRows() > 0) {
+                    batchesByBucket(id).append(newBatch)
+                  } else {
+                    newBatch.safeClose()
+                  }
+                }
+              }
+              }
+            }
+          }
+        }
+      }
+      repartitionHappened = true
+    }
+
+    while (aggregatedBatches.hasNext) {
+      repartitionAndClose(aggregatedBatches.next)
+    }
+
+    // Deal with the over sized buckets
+    def needRepartitionAgain(bucket: AutoClosableArrayBuffer[SpillableColumnarBatch]) = {
+      bucket.map(_.sizeInBytes).sum > targetMergeBatchSize &&
+        bucket.size() != 1 &&
+        !bucket.forall(_.numRows() == 1) // this is for test
+    }
+
+    if (repartitionHappened && batchesByBucket.exists(needRepartitionAgain)) {
+      logDebug("Some of the repartition buckets are over sized, trying to split them")
+
+      val newBuckets = batchesByBucket.flatMap(bucket => {
+        if (needRepartitionAgain(bucket)) {
+          if (hashSeed + 7 > 200) {
+            log.warn("Too many times of repartition, may hit a bug? Size for each batch in " +
+              "current bucket: " + bucket.map(_.sizeInBytes).mkString(", ") + " rows: " +
+              bucket.map(_.numRows()).mkString(", ") + " targetMergeBatchSize: "
+              + targetMergeBatchSize)
+            ArrayBuffer.apply(bucket)
+          } else {
+            val nextLayerBuckets =
+              ArrayBuffer.fill(hashBucketNum)(new AutoClosableArrayBuffer[SpillableColumnarBatch]())
+            // Recursively merge and repartition the over sized bucket
+            repartitionHappened =
+              iterateAndRepartition(
+                new CloseableBufferedIterator(bucket.iterator), metrics, targetMergeBatchSize,
+                helper, hashKeys, hashBucketNum, hashSeed + 7,
+                nextLayerBuckets) || repartitionHappened
+            nextLayerBuckets
+          }
+        } else {
+          ArrayBuffer.apply(bucket)
+        }
+      })
+      batchesByBucket.clear()
+      batchesByBucket.appendAll(newBuckets)
+    }
+
+    repartitionHappened
+  }
 }
 
 /** Utility class to hold all of the metrics related to hash aggregation */
 case class GpuHashAggregateMetrics(
     numOutputRows: GpuMetric,
     numOutputBatches: GpuMetric,
-    numTasksFallBacked: GpuMetric,
+    numTasksRepartitioned: GpuMetric,
+    numTasksSkippedAgg: GpuMetric,
     opTime: GpuMetric,
     computeAggTime: GpuMetric,
     concatTime: GpuMetric,
     sortTime: GpuMetric,
+    repartitionTime: GpuMetric,
     numAggOps: GpuMetric,
     numPreSplits: GpuMetric,
     singlePassTasks: GpuMetric,
@@ -208,7 +389,7 @@ class AggHelper(
 
   private val groupingAttributes = groupingExpressions.map(_.toAttribute)
   private val aggBufferAttributes = groupingAttributes ++
-      aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
+    aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
 
   // `GpuAggregateFunction` can add a pre and post step for update
   // and merge aggregates.
@@ -228,7 +409,7 @@ class AggHelper(
   postStep ++= groupingAttributes
   postStepAttr ++= groupingAttributes
   postStepDataTypes ++=
-      groupingExpressions.map(_.dataType)
+    groupingExpressions.map(_.dataType)
 
   private var ix = groupingAttributes.length
   for (aggExp <- aggregateExpressions) {
@@ -380,9 +561,9 @@ class AggHelper(
     withResource(new NvtxRange("groupby", NvtxColor.BLUE)) { _ =>
       withResource(GpuColumnVector.from(preProcessed)) { preProcessedTbl =>
         val groupOptions = cudf.GroupByOptions.builder()
-            .withIgnoreNullKeys(false)
-            .withKeysSorted(doSortAgg)
-            .build()
+          .withIgnoreNullKeys(false)
+          .withKeysSorted(doSortAgg)
+          .build()
 
         val cudfAggsOnColumn = cudfAggregates.zip(aggOrdinals).map {
           case (cudfAgg, ord) => cudfAgg.groupByAggregate.onColumn(ord)
@@ -390,8 +571,8 @@ class AggHelper(
 
         // perform the aggregate
         val aggTbl = preProcessedTbl
-            .groupBy(groupOptions, groupingOrdinals: _*)
-            .aggregate(cudfAggsOnColumn.toSeq: _*)
+          .groupBy(groupOptions, groupingOrdinals: _*)
+          .aggregate(cudfAggsOnColumn.toSeq: _*)
 
         withResource(aggTbl) { _ =>
           GpuColumnVector.from(aggTbl, postStepDataTypes.toArray)
@@ -555,8 +736,8 @@ object GpuAggFirstPassIterator {
       metrics: GpuHashAggregateMetrics
   ): Iterator[SpillableColumnarBatch] = {
     val preprocessProjectIter = cbIter.map { cb =>
-      val sb = SpillableColumnarBatch (cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY)
-      aggHelper.preStepBound.projectAndCloseWithRetrySingleBatch (sb)
+      val sb = SpillableColumnarBatch(cb, SpillPriorities.ACTIVE_ON_DECK_PRIORITY)
+      aggHelper.preStepBound.projectAndCloseWithRetrySingleBatch(sb)
     }
     computeAggregateWithoutPreprocessAndClose(metrics, preprocessProjectIter, aggHelper)
   }
@@ -597,18 +778,18 @@ object GpuAggFinalPassIterator {
       modeInfo: AggregateModeInfo): BoundExpressionsModeAggregates = {
     val groupingAttributes = groupingExpressions.map(_.toAttribute)
     val aggBufferAttributes = groupingAttributes ++
-        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
+      aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
 
     val boundFinalProjections = if (modeInfo.hasFinalMode || modeInfo.hasCompleteMode) {
       val finalProjections = groupingAttributes ++
-          aggregateExpressions.map(_.aggregateFunction.evaluateExpression)
+        aggregateExpressions.map(_.aggregateFunction.evaluateExpression)
       Some(GpuBindReferences.bindGpuReferences(finalProjections, aggBufferAttributes))
     } else {
       None
     }
 
     // allAttributes can be different things, depending on aggregation mode:
-    // - Partial mode: grouping key + cudf aggregates (e.g. no avg, intead sum::count
+    // - Partial mode: grouping key + cudf aggregates (e.g. no avg, instead sum::count
     // - Final mode: grouping key + spark aggregates (e.g. avg)
     val finalAttributes = groupingAttributes ++ aggregateAttributes
 
@@ -689,17 +870,22 @@ object GpuAggFinalPassIterator {
 /**
  * Iterator that takes another columnar batch iterator as input and emits new columnar batches that
  * are aggregated based on the specified grouping and aggregation expressions. This iterator tries
- * to perform a hash-based aggregation but is capable of falling back to a sort-based aggregation
- * which can operate on data that is either larger than can be represented by a cudf column or
- * larger than can fit in GPU memory.
+ * to perform a hash-based aggregation but is capable of falling back to a repartition-based
+ * aggregation which can operate on data that is either larger than can be represented by a cudf
+ * column or larger than can fit in GPU memory.
+ *
+ * In general, GpuMergeAggregateIterator works in this flow:
  *
- * The iterator starts by pulling all batches from the input iterator, performing an initial
- * projection and aggregation on each individual batch via `aggregateInputBatches()`. The resulting
- * aggregated batches are cached in memory as spillable batches. Once all input batches have been
- * aggregated, `tryMergeAggregatedBatches()` is called to attempt a merge of the aggregated batches
- * into a single batch. If this is successful then the resulting batch can be returned, otherwise
- * `buildSortFallbackIterator` is used to sort the aggregated batches by the grouping keys and
- * performs a final merge aggregation pass on the sorted batches.
+ * (1) The iterator starts by pulling all batches from the input iterator, performing an initial
+ * projection and aggregation on each individual batch via `GpuAggFirstPassIterator`, we call it
+ * "First Pass Aggregate".
+ * (2) Then the batches after first pass agg is sent to "streamAggregateNeighours", where it tries
+ * to concat & merge the neighbour batches into fewer batches, then "iterateAndRepartition"
+ * repartition the batch into fixed size buckets. Recursive repartition will be applied on
+ * over-sized buckets until each bucket * is within the target size.
+ * We call this phase "Second Pass Aggregate".
+ * (3) At "Third Pass Aggregate", we take each bucket and perform a final aggregation on all batches
+ * in the bucket, check "RepartitionAggregateIterator" for details.
  *
  * @param firstPassIter iterator that has done a first aggregation pass over the input data.
  * @param inputAttributes input attributes to identify the input columns from the input batches
@@ -710,13 +896,12 @@ object GpuAggFinalPassIterator {
  * @param modeInfo identifies which aggregation modes are being used
  * @param metrics metrics that will be updated during aggregation
  * @param configuredTargetBatchSize user-specified value for the targeted input batch size
- * @param useTieredProject user-specified option to enable tiered projections
  * @param allowNonFullyAggregatedOutput if allowed to skip third pass Agg
  * @param skipAggPassReductionRatio skip if the ratio of rows after a pass is bigger than this value
  * @param localInputRowsCount metric to track the number of input rows processed locally
  */
 class GpuMergeAggregateIterator(
-    firstPassIter: Iterator[SpillableColumnarBatch],
+    firstPassIter: CloseableBufferedIterator[SpillableColumnarBatch],
     inputAttributes: Seq[Attribute],
     groupingExpressions: Seq[NamedExpression],
     aggregateExpressions: Seq[GpuAggregateExpression],
@@ -728,18 +913,22 @@ class GpuMergeAggregateIterator(
     conf: SQLConf,
     allowNonFullyAggregatedOutput: Boolean,
     skipAggPassReductionRatio: Double,
-    localInputRowsCount: LocalGpuMetric)
-    extends Iterator[ColumnarBatch] with AutoCloseable with Logging {
+    localInputRowsCount: LocalGpuMetric
+)
+  extends Iterator[ColumnarBatch] with AutoCloseable with Logging {
   private[this] val isReductionOnly = groupingExpressions.isEmpty
   private[this] val targetMergeBatchSize = computeTargetMergeBatchSize(configuredTargetBatchSize)
-  private[this] val aggregatedBatches = new util.ArrayDeque[SpillableColumnarBatch]
-  private[this] var outOfCoreIter: Option[GpuOutOfCoreSortIterator] = None
 
-  /** Iterator for fetching aggregated batches either if:
-   * 1. a sort-based fallback has occurred
-   * 2. skip third pass agg has occurred
-   **/
-  private[this] var fallbackIter: Option[Iterator[ColumnarBatch]] = None
+  private[this] val defaultHashBucketNum = 16
+  private[this] val defaultHashSeed = 107
+  private[this] var batchesByBucket =
+    ArrayBuffer.fill(defaultHashBucketNum)(new AutoClosableArrayBuffer[SpillableColumnarBatch]())
+
+  private[this] var firstBatchChecked = false
+
+  private[this] var bucketIter: Option[RepartitionAggregateIterator] = None
+
+  private[this] var realIter: Option[Iterator[ColumnarBatch]] = None
 
   /** Whether a batch is pending for a reduction-only aggregation */
   private[this] var hasReductionOnlyBatch: Boolean = isReductionOnly
@@ -752,286 +941,168 @@ class GpuMergeAggregateIterator(
   }
 
   override def hasNext: Boolean = {
-    fallbackIter.map(_.hasNext).getOrElse {
+    realIter.map(_.hasNext).getOrElse {
       // reductions produce a result even if the input is empty
-      hasReductionOnlyBatch || !aggregatedBatches.isEmpty || firstPassIter.hasNext
+      hasReductionOnlyBatch || firstPassIter.hasNext
     }
   }
 
   override def next(): ColumnarBatch = {
-    fallbackIter.map(_.next()).getOrElse {
-      var shouldSkipThirdPassAgg = false
-
-      // aggregate and merge all pending inputs
-      if (firstPassIter.hasNext) {
-        // first pass agg
-        val rowsAfterFirstPassAgg = aggregateInputBatches()
-
-        // by now firstPassIter has been traversed, so localInputRowsCount is finished updating
-        if (isReductionOnly ||
-          skipAggPassReductionRatio * localInputRowsCount.value >= rowsAfterFirstPassAgg) {
-          // second pass agg
-          tryMergeAggregatedBatches()
-
-          val rowsAfterSecondPassAgg = aggregatedBatches.asScala.foldLeft(0L) {
-            (totalRows, batch) => totalRows + batch.numRows()
-          }
-          shouldSkipThirdPassAgg =
-            rowsAfterSecondPassAgg > skipAggPassReductionRatio * rowsAfterFirstPassAgg
-        } else {
-          shouldSkipThirdPassAgg = true
-          logInfo(s"Rows after first pass aggregation $rowsAfterFirstPassAgg exceeds " +
-            s"${skipAggPassReductionRatio * 100}% of " +
-            s"localInputRowsCount ${localInputRowsCount.value}, skip the second pass agg")
-        }
-      }
+    realIter.map(_.next()).getOrElse {
 
-      if (aggregatedBatches.size() > 1) {
-        // Unable to merge to a single output, so must fall back
-        if (allowNonFullyAggregatedOutput && shouldSkipThirdPassAgg) {
-          // skip third pass agg, return the aggregated batches directly
-          logInfo(s"Rows after second pass aggregation exceeds " +
-            s"${skipAggPassReductionRatio * 100}% of " +
-            s"rows after first pass, skip the third pass agg")
-          fallbackIter = Some(new Iterator[ColumnarBatch] {
-            override def hasNext: Boolean = !aggregatedBatches.isEmpty
-
-            override def next(): ColumnarBatch = {
-              withResource(aggregatedBatches.pop()) { spillableBatch =>
-                spillableBatch.getColumnarBatch()
-              }
-            }
-          })
-        } else {
-          // fallback to sort agg, this is the third pass agg
-          fallbackIter = Some(buildSortFallbackIterator())
+      // Handle reduction-only aggregation
+      if (isReductionOnly) {
+        val batches = ArrayBuffer.apply[SpillableColumnarBatch]()
+        while (firstPassIter.hasNext) {
+          batches += firstPassIter.next()
         }
-        fallbackIter.get.next()
-      } else if (aggregatedBatches.isEmpty) {
-        if (hasReductionOnlyBatch) {
+
+        if (batches.isEmpty || batches.forall(_.numRows() == 0)) {
           hasReductionOnlyBatch = false
-          generateEmptyReductionBatch()
+          return generateEmptyReductionBatch()
         } else {
-          throw new NoSuchElementException("batches exhausted")
+          hasReductionOnlyBatch = false
+          val concat = AggregateUtils.concatenateAndMerge(batches, metrics, concatAndMergeHelper)
+          return withResource(concat) { cb =>
+            cb.getColumnarBatch()
+          }
         }
-      } else {
-        // this will be the last batch
-        hasReductionOnlyBatch = false
-        withResource(aggregatedBatches.pop()) { spillableBatch =>
-          spillableBatch.getColumnarBatch()
+      }
+
+      // Handle the case of skipping second and third pass of aggregation
+      // This only work when spark.rapids.sql.agg.skipAggPassReductionRatio < 1
+      if (!firstBatchChecked && firstPassIter.hasNext
+        && allowNonFullyAggregatedOutput) {
+        firstBatchChecked = true
+
+        val peek = firstPassIter.head
+        // It's only based on first batch of first pass agg, so it's an estimate
+        val firstPassReductionRatioEstimate = 1.0 * peek.numRows() / localInputRowsCount.value
+        if (firstPassReductionRatioEstimate > skipAggPassReductionRatio) {
+          logDebug("Skipping second and third pass aggregation due to " +
+            "too high reduction ratio in first pass: " +
+            s"$firstPassReductionRatioEstimate")
+          // if so, skip any aggregation, return the origin batch directly
+
+          realIter = Some(ConcatIterator.apply(firstPassIter, configuredTargetBatchSize))
+          metrics.numTasksSkippedAgg += 1
+          return realIter.get.next()
+        } else {
+          logInfo(s"The reduction ratio in first pass is not high enough to skip " +
+            s"second and third pass aggregation: peek.numRows: ${peek.numRows()}, " +
+            s"localInputRowsCount.value: ${localInputRowsCount.value}")
         }
       }
+      firstBatchChecked = true
+
+      val groupingAttributes = groupingExpressions.map(_.toAttribute)
+      val aggBufferAttributes = groupingAttributes ++
+        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
+      val hashKeys: Seq[GpuExpression] =
+        GpuBindReferences.bindGpuReferences(groupingAttributes, aggBufferAttributes.toSeq)
+
+      val repartitionHappened = AggregateUtils.iterateAndRepartition(
+        AggregateUtils.streamAggregateNeighours(
+          firstPassIter,
+          metrics,
+          targetMergeBatchSize,
+          configuredTargetBatchSize,
+          concatAndMergeHelper)
+        , metrics, targetMergeBatchSize, concatAndMergeHelper,
+        hashKeys, defaultHashBucketNum, defaultHashSeed, batchesByBucket)
+      if (repartitionHappened) {
+        metrics.numTasksRepartitioned += 1
+      }
+
+      realIter = Some(ConcatIterator.apply(
+        new CloseableBufferedIterator(buildBucketIterator()), configuredTargetBatchSize))
+      realIter.get.next()
     }
   }
 
   override def close(): Unit = {
-    aggregatedBatches.forEach(_.safeClose())
-    aggregatedBatches.clear()
-    outOfCoreIter.foreach(_.close())
-    outOfCoreIter = None
-    fallbackIter = None
+    batchesByBucket.foreach(_.close())
+    batchesByBucket.clear()
     hasReductionOnlyBatch = false
   }
 
   private def computeTargetMergeBatchSize(confTargetSize: Long): Long = {
     val mergedTypes = groupingExpressions.map(_.dataType) ++ aggregateExpressions.map(_.dataType)
-    AggregateUtils.computeTargetBatchSize(confTargetSize, mergedTypes, mergedTypes,isReductionOnly)
+    AggregateUtils.computeTargetBatchSize(confTargetSize, mergedTypes, mergedTypes, isReductionOnly)
   }
 
-  /** Aggregate all input batches and place the results in the aggregatedBatches queue. */
-  private def aggregateInputBatches(): Long = {
-    var rowsAfter = 0L
-    // cache everything in the first pass
-    while (firstPassIter.hasNext) {
-      val batch = firstPassIter.next()
-      rowsAfter += batch.numRows()
-      aggregatedBatches.add(batch)
-    }
-    rowsAfter
-  }
+  private lazy val concatAndMergeHelper =
+    new AggHelper(inputAttributes, groupingExpressions, aggregateExpressions,
+      forceMerge = true, conf, isSorted = false)
+
+  private case class ConcatIterator(
+      input: CloseableBufferedIterator[SpillableColumnarBatch],
+      targetSize: Long)
+    extends Iterator[ColumnarBatch] {
+
+    override def hasNext: Boolean = input.hasNext
+
+    override def next(): ColumnarBatch = {
+      // combine all the data into a single batch
+      val spillCbs = ArrayBuffer[SpillableColumnarBatch]()
+      var totalBytes = 0L
+      closeOnExcept(spillCbs) { _ =>
+        while (input.hasNext && (spillCbs.isEmpty ||
+          (totalBytes + input.head.sizeInBytes) < targetSize)) {
+          val tmp = input.next
+          totalBytes += tmp.sizeInBytes
+          spillCbs += tmp
+        }
 
-  /**
-   * Attempt to merge adjacent batches in the aggregatedBatches queue until either there is only
-   * one batch or merging adjacent batches would exceed the target batch size.
-   */
-  private def tryMergeAggregatedBatches(): Unit = {
-    while (aggregatedBatches.size() > 1) {
-      val concatTime = metrics.concatTime
-      val opTime = metrics.opTime
-      withResource(new NvtxWithMetrics("agg merge pass", NvtxColor.BLUE, concatTime,
-        opTime)) { _ =>
-        // continue merging as long as some batches are able to be combined
-        if (!mergePass()) {
-          if (aggregatedBatches.size() > 1 && isReductionOnly) {
-            // We were unable to merge the aggregated batches within the target batch size limit,
-            // which means normally we would fallback to a sort-based approach. However for
-            // reduction-only aggregation there are no keys to use for a sort. The only way this
-            // can work is if all batches are merged. This will exceed the target batch size limit,
-            // but at this point it is either risk an OOM/cudf error and potentially work or
-            // not work at all.
-            logWarning(s"Unable to merge reduction-only aggregated batches within " +
-                s"target batch limit of $targetMergeBatchSize, attempting to merge remaining " +
-                s"${aggregatedBatches.size()} batches beyond limit")
-            withResource(mutable.ArrayBuffer[SpillableColumnarBatch]()) { batchesToConcat =>
-              aggregatedBatches.forEach(b => batchesToConcat += b)
-              aggregatedBatches.clear()
-              val batch = concatenateAndMerge(batchesToConcat)
-              // batch does not need to be marked spillable since it is the last and only batch
-              // and will be immediately retrieved on the next() call.
-              aggregatedBatches.add(batch)
-            }
-          }
-          return
+        val concat = GpuAggregateIterator.concatenateBatches(metrics, spillCbs.toSeq)
+        withResource(concat) { _ =>
+          concat.getColumnarBatch()
         }
       }
     }
   }
 
-  /**
-   * Perform a single pass over the aggregated batches attempting to merge adjacent batches.
-   * @return true if at least one merge operation occurred
-   */
-  private def mergePass(): Boolean = {
-    val batchesToConcat: mutable.ArrayBuffer[SpillableColumnarBatch] = mutable.ArrayBuffer.empty
-    var wasBatchMerged = false
-    // Current size in bytes of the batches targeted for the next concatenation
-    var concatSize: Long = 0L
-    var batchesLeftInPass = aggregatedBatches.size()
-
-    while (batchesLeftInPass > 0) {
-      closeOnExcept(batchesToConcat) { _ =>
-        var isConcatSearchFinished = false
-        // Old batches are picked up at the front of the queue and freshly merged batches are
-        // appended to the back of the queue. Although tempting to allow the pass to "wrap around"
-        // and pick up batches freshly merged in this pass, it's avoided to prevent changing the
-        // order of aggregated batches.
-        while (batchesLeftInPass > 0 && !isConcatSearchFinished) {
-          val candidate = aggregatedBatches.getFirst
-          val potentialSize = concatSize + candidate.sizeInBytes
-          isConcatSearchFinished = concatSize > 0 && potentialSize > targetMergeBatchSize
-          if (!isConcatSearchFinished) {
-            batchesLeftInPass -= 1
-            batchesToConcat += aggregatedBatches.removeFirst()
-            concatSize = potentialSize
-          }
-        }
-      }
+  private case class RepartitionAggregateIterator(opTime: GpuMetric)
+    extends Iterator[SpillableColumnarBatch] {
 
-      val mergedBatch = if (batchesToConcat.length > 1) {
-        wasBatchMerged = true
-        concatenateAndMerge(batchesToConcat)
-      } else {
-        // Unable to find a neighboring buffer to produce a valid merge in this pass,
-        // so simply put this buffer back on the queue for other passes.
-        batchesToConcat.remove(0)
-      }
+    batchesByBucket = batchesByBucket.filter(_.size() > 0)
 
-      // Add the merged batch to the end of the aggregated batch queue. Only a single pass over
-      // the batches is being performed due to the batch count check above, so the single-pass
-      // loop will terminate before picking up this new batch.
-      aggregatedBatches.addLast(mergedBatch)
-      batchesToConcat.clear()
-      concatSize = 0
-    }
-
-    wasBatchMerged
-  }
+    override def hasNext: Boolean = batchesByBucket.nonEmpty
 
-  private lazy val concatAndMergeHelper =
-    new AggHelper(inputAttributes, groupingExpressions, aggregateExpressions,
-      forceMerge = true, conf = conf)
-
-  /**
-   * Concatenate batches together and perform a merge aggregation on the result. The input batches
-   * will be closed as part of this operation.
-   * @param batches batches to concatenate and merge aggregate
-   * @return lazy spillable batch which has NOT been marked spillable
-   */
-  private def concatenateAndMerge(
-      batches: mutable.ArrayBuffer[SpillableColumnarBatch]): SpillableColumnarBatch = {
-    // TODO: concatenateAndMerge (and calling code) could output a sequence
-    //   of batches for the partial aggregate case. This would be done in case
-    //   a retry failed a certain number of times.
-    val concatBatch = withResource(batches) { _ =>
-      val concatSpillable = concatenateBatches(metrics, batches.toSeq)
-      withResource(concatSpillable) { _.getColumnarBatch() }
-    }
-    computeAggregateAndClose(metrics, concatBatch, concatAndMergeHelper)
-  }
+    override def next(): SpillableColumnarBatch = {
+      withResource(new NvtxWithMetrics("RepartitionAggregateIterator.next",
+        NvtxColor.BLUE, opTime)) { _ =>
 
-  /** Build an iterator that uses a sort-based approach to merge aggregated batches together. */
-  private def buildSortFallbackIterator(): Iterator[ColumnarBatch] = {
-    logInfo(s"Falling back to sort-based aggregation with ${aggregatedBatches.size()} batches")
-    metrics.numTasksFallBacked += 1
-    val aggregatedBatchIter = new Iterator[ColumnarBatch] {
-      override def hasNext: Boolean = !aggregatedBatches.isEmpty
+        if (batchesByBucket.last.size() == 1) {
+          batchesByBucket.remove(batchesByBucket.size - 1).removeLast()
+        } else {
+          // put as many buckets as possible together to aggregate, to reduce agg times
+          closeOnExcept(new ArrayBuffer[AutoClosableArrayBuffer[SpillableColumnarBatch]]) {
+            toAggregateBuckets =>
+              var currentSize = 0L
+              while (batchesByBucket.nonEmpty && (toAggregateBuckets.isEmpty ||
+                batchesByBucket.last.size() + currentSize < targetMergeBatchSize)) {
+                val bucket = batchesByBucket.remove(batchesByBucket.size - 1)
+                currentSize += bucket.map(_.sizeInBytes).sum
+                toAggregateBuckets += bucket
+              }
 
-      override def next(): ColumnarBatch = {
-        withResource(aggregatedBatches.removeFirst()) { spillable =>
-          spillable.getColumnarBatch()
+              AggregateUtils.concatenateAndMerge(
+                toAggregateBuckets.flatMap(_.data), metrics, concatAndMergeHelper)
+          }
         }
       }
     }
+  }
 
-    if (isReductionOnly) {
-      // Normally this should never happen because `tryMergeAggregatedBatches` should have done
-      // a last-ditch effort to concatenate all batches together regardless of target limits.
-      throw new IllegalStateException("Unable to fallback to sort-based aggregation " +
-          "without grouping keys")
-    }
 
-    val groupingAttributes = groupingExpressions.map(_.toAttribute)
-    val ordering = groupingAttributes.map(SortOrder(_, Ascending, NullsFirst, Seq.empty))
-    val aggBufferAttributes = groupingAttributes ++
-        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
-    val sorter = new GpuSorter(ordering, aggBufferAttributes)
-    val aggBatchTypes = aggBufferAttributes.map(_.dataType)
-
-    // Use the out of core sort iterator to sort the batches by grouping key
-    outOfCoreIter = Some(GpuOutOfCoreSortIterator(
-      aggregatedBatchIter,
-      sorter,
-      configuredTargetBatchSize,
-      opTime = metrics.opTime,
-      sortTime = metrics.sortTime,
-      outputBatches = NoopMetric,
-      outputRows = NoopMetric))
-
-    // The out of core sort iterator does not guarantee that a batch contains all of the values
-    // for a particular key, so add a key batching iterator to enforce this. That allows each batch
-    // to be merge-aggregated safely since all values associated with a particular key are
-    // guaranteed to be in the same batch.
-    val keyBatchingIter = new GpuKeyBatchingIterator(
-      outOfCoreIter.get,
-      sorter,
-      aggBatchTypes.toArray,
-      configuredTargetBatchSize,
-      numInputRows = NoopMetric,
-      numInputBatches = NoopMetric,
-      numOutputRows = NoopMetric,
-      numOutputBatches = NoopMetric,
-      concatTime = metrics.concatTime,
-      opTime = metrics.opTime)
-
-    // Finally wrap the key batching iterator with a merge aggregation on the output batches.
-    new Iterator[ColumnarBatch] {
-      override def hasNext: Boolean = keyBatchingIter.hasNext
-
-      private val mergeSortedHelper =
-        new AggHelper(inputAttributes, groupingExpressions, aggregateExpressions,
-          forceMerge = true, conf, isSorted = true)
-
-      override def next(): ColumnarBatch = {
-        // batches coming out of the sort need to be merged
-        val resultSpillable =
-          computeAggregateAndClose(metrics, keyBatchingIter.next(), mergeSortedHelper)
-        withResource(resultSpillable) { _ =>
-          resultSpillable.getColumnarBatch()
-        }
-      }
-    }
+  /** Build an iterator merging aggregated batches in each bucket. */
+  private def buildBucketIterator(): Iterator[SpillableColumnarBatch] = {
+    bucketIter = Some(RepartitionAggregateIterator(opTime = metrics.opTime))
+    bucketIter.get
   }
 
+
   /**
    * Generates the result of a reduction-only aggregation on empty input by emitting the
    * initial value of each aggregator.
@@ -1117,13 +1188,13 @@ abstract class GpuBaseAggregateMeta[INPUT <: SparkPlan](
     )
     if (arrayWithStructsGroupings) {
       willNotWorkOnGpu("ArrayTypes with Struct children in grouping expressions are not " +
-          "supported")
+        "supported")
     }
 
     tagForReplaceMode()
 
     if (agg.aggregateExpressions.exists(expr => expr.isDistinct)
-        && agg.aggregateExpressions.exists(expr => expr.filter.isDefined)) {
+      && agg.aggregateExpressions.exists(expr => expr.filter.isDefined)) {
       // Distinct with Filter is not supported on the GPU currently,
       // This makes sure that if we end up here, the plan falls back to the CPU
       // which will do the right thing.
@@ -1195,15 +1266,15 @@ abstract class GpuBaseAggregateMeta[INPUT <: SparkPlan](
         // (due to First). Fall back to CPU in this case.
         if (AggregateUtils.shouldFallbackMultiDistinct(agg.aggregateExpressions)) {
           willNotWorkOnGpu("Aggregates of non-distinct functions with multiple distinct " +
-              "functions are non-deterministic for non-distinct functions as it is " +
-              "computed using First.")
+            "functions are non-deterministic for non-distinct functions as it is " +
+            "computed using First.")
         }
       }
     }
 
     if (!conf.partialMergeDistinctEnabled && aggPattern.contains(PartialMerge)) {
       willNotWorkOnGpu("Replacing Partial Merge aggregates disabled. " +
-          s"Set ${conf.partialMergeDistinctEnabled} to true if desired")
+        s"Set ${conf.partialMergeDistinctEnabled} to true if desired")
     }
   }
 
@@ -1256,11 +1327,11 @@ abstract class GpuBaseAggregateMeta[INPUT <: SparkPlan](
     // This is a short term heuristic until we can better understand the cost
     // of sort vs the cost of doing aggregations so we can better decide.
     lazy val hasSingleBasicGroupingKey = agg.groupingExpressions.length == 1 &&
-        agg.groupingExpressions.headOption.map(_.dataType).exists {
-          case StringType | BooleanType | ByteType | ShortType | IntegerType |
-               LongType | _: DecimalType | DateType | TimestampType => true
-          case _ => false
-        }
+      agg.groupingExpressions.headOption.map(_.dataType).exists {
+        case StringType | BooleanType | ByteType | ShortType | IntegerType |
+             LongType | _: DecimalType | DateType | TimestampType => true
+        case _ => false
+      }
 
     val gpuChild = childPlans.head.convertIfNeeded()
     val gpuAggregateExpressions =
@@ -1314,11 +1385,11 @@ abstract class GpuBaseAggregateMeta[INPUT <: SparkPlan](
     }
 
     val allowSinglePassAgg = (conf.forceSinglePassPartialSortAgg ||
-        (conf.allowSinglePassPartialSortAgg &&
-            hasSingleBasicGroupingKey &&
-            estimatedPreProcessGrowth > 1.1)) &&
-        canUsePartialSortAgg &&
-        groupingCanBeSorted
+      (conf.allowSinglePassPartialSortAgg &&
+        hasSingleBasicGroupingKey &&
+        estimatedPreProcessGrowth > 1.1)) &&
+      canUsePartialSortAgg &&
+      groupingCanBeSorted
 
     GpuHashAggregateExec(
       aggRequiredChildDistributionExpressions,
@@ -1332,7 +1403,8 @@ abstract class GpuBaseAggregateMeta[INPUT <: SparkPlan](
       conf.forceSinglePassPartialSortAgg,
       allowSinglePassAgg,
       allowNonFullyAggregatedOutput,
-      conf.skipAggPassReductionRatio)
+      conf.skipAggPassReductionRatio
+    )
   }
 }
 
@@ -1351,7 +1423,7 @@ abstract class GpuTypedImperativeSupportedAggregateExecMeta[INPUT <: BaseAggrega
   private val mayNeedAggBufferConversion: Boolean =
     agg.aggregateExpressions.exists { expr =>
       expr.aggregateFunction.isInstanceOf[TypedImperativeAggregate[_]] &&
-          (expr.mode == Partial || expr.mode == PartialMerge)
+        (expr.mode == Partial || expr.mode == PartialMerge)
     }
 
   // overriding data types of Aggregation Buffers if necessary
@@ -1420,6 +1492,7 @@ abstract class GpuTypedImperativeSupportedAggregateExecMeta[INPUT <: BaseAggrega
         allowSinglePassAgg = false,
         allowNonFullyAggregatedOutput = false,
         1)
+
     } else {
       super.convertToGpu()
     }
@@ -1523,8 +1596,8 @@ object GpuTypedImperativeSupportedAggregateExecMeta {
         // [A]. there will be a R2C or C2R transition between them
         // [B]. there exists TypedImperativeAggregate functions in each of them
         (stages(i).canThisBeReplaced ^ stages(i + 1).canThisBeReplaced) &&
-            containTypedImperativeAggregate(stages(i)) &&
-            containTypedImperativeAggregate(stages(i + 1))
+          containTypedImperativeAggregate(stages(i)) &&
+          containTypedImperativeAggregate(stages(i + 1))
     }
 
     // Return if all internal aggregation buffers are compatible with GPU Overrides.
@@ -1602,10 +1675,10 @@ object GpuTypedImperativeSupportedAggregateExecMeta {
       fromCpuToGpu: Boolean): Seq[NamedExpression] = {
 
     val converters = mutable.Queue[Either[
-        CpuToGpuAggregateBufferConverter, GpuToCpuAggregateBufferConverter]]()
+      CpuToGpuAggregateBufferConverter, GpuToCpuAggregateBufferConverter]]()
     mergeAggMeta.childExprs.foreach {
       case e if e.childExprs.length == 1 &&
-          e.childExprs.head.isInstanceOf[TypedImperativeAggExprMeta[_]] =>
+        e.childExprs.head.isInstanceOf[TypedImperativeAggExprMeta[_]] =>
         e.wrapped.asInstanceOf[AggregateExpression].mode match {
           case Final | PartialMerge =>
             val typImpAggMeta = e.childExprs.head.asInstanceOf[TypedImperativeAggExprMeta[_]]
@@ -1660,16 +1733,16 @@ class GpuHashAggregateMeta(
     conf: RapidsConf,
     parent: Option[RapidsMeta[_, _, _]],
     rule: DataFromReplacementRule)
-    extends GpuBaseAggregateMeta(agg, agg.requiredChildDistributionExpressions,
-      conf, parent, rule)
+  extends GpuBaseAggregateMeta(agg, agg.requiredChildDistributionExpressions,
+    conf, parent, rule)
 
 class GpuSortAggregateExecMeta(
     override val agg: SortAggregateExec,
     conf: RapidsConf,
     parent: Option[RapidsMeta[_, _, _]],
     rule: DataFromReplacementRule)
-    extends GpuTypedImperativeSupportedAggregateExecMeta(agg,
-      agg.requiredChildDistributionExpressions, conf, parent, rule) {
+  extends GpuTypedImperativeSupportedAggregateExecMeta(agg,
+    agg.requiredChildDistributionExpressions, conf, parent, rule) {
   override def tagPlanForGpu(): Unit = {
     super.tagPlanForGpu()
 
@@ -1716,14 +1789,14 @@ class GpuObjectHashAggregateExecMeta(
     conf: RapidsConf,
     parent: Option[RapidsMeta[_, _, _]],
     rule: DataFromReplacementRule)
-    extends GpuTypedImperativeSupportedAggregateExecMeta(agg,
-      agg.requiredChildDistributionExpressions, conf, parent, rule)
+  extends GpuTypedImperativeSupportedAggregateExecMeta(agg,
+    agg.requiredChildDistributionExpressions, conf, parent, rule)
 
 object GpuHashAggregateExecBase {
 
   def calcInputAttributes(aggregateExpressions: Seq[GpuAggregateExpression],
-                          childOutput: Seq[Attribute],
-                          inputAggBufferAttributes: Seq[Attribute]): Seq[Attribute] = {
+      childOutput: Seq[Attribute],
+      inputAggBufferAttributes: Seq[Attribute]): Seq[Attribute] = {
     val modes = aggregateExpressions.map(_.mode).distinct
     if (modes.contains(Final) || modes.contains(PartialMerge)) {
       // SPARK-31620: when planning aggregates, the partial aggregate uses aggregate function's
@@ -1754,7 +1827,7 @@ object GpuHashAggregateExecBase {
 }
 
 /**
- * The GPU version of SortAggregateExec that is intended for partial aggregations that are not
+ * The GPU version of AggregateExec that is intended for partial aggregations that are not
  * reductions and so it sorts the input data ahead of time to do it in a single pass.
  *
  * @param requiredChildDistributionExpressions this is unchanged by the GPU. It is used in
@@ -1767,7 +1840,6 @@ object GpuHashAggregateExecBase {
  *                          node should project)
  * @param child incoming plan (where we get input columns from)
  * @param configuredTargetBatchSize user-configured maximum device memory size of a batch
- * @param configuredTieredProjectEnabled configurable optimization to use tiered projections
  * @param allowNonFullyAggregatedOutput whether we can skip the third pass of aggregation
  *                                      (can omit non fully aggregated data for non-final
  *                                      stage of aggregation)
@@ -1802,11 +1874,13 @@ case class GpuHashAggregateExec(
   protected override val outputRowsLevel: MetricsLevel = ESSENTIAL_LEVEL
   protected override val outputBatchesLevel: MetricsLevel = MODERATE_LEVEL
   override lazy val additionalMetrics: Map[String, GpuMetric] = Map(
-    NUM_TASKS_FALL_BACKED -> createMetric(MODERATE_LEVEL, DESCRIPTION_NUM_TASKS_FALL_BACKED),
+    NUM_TASKS_REPARTITIONED -> createMetric(MODERATE_LEVEL, DESCRIPTION_NUM_TASKS_REPARTITIONED),
+    NUM_TASKS_SKIPPED_AGG -> createMetric(MODERATE_LEVEL, DESCRIPTION_NUM_TASKS_SKIPPED_AGG),
     OP_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_OP_TIME),
     AGG_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_AGG_TIME),
     CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
     SORT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_SORT_TIME),
+    REPARTITION_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_REPARTITION_TIME),
     "NUM_AGGS" -> createMetric(DEBUG_LEVEL, "num agg operations"),
     "NUM_PRE_SPLITS" -> createMetric(DEBUG_LEVEL, "num pre splits"),
     "NUM_TASKS_SINGLE_PASS" -> createMetric(MODERATE_LEVEL, "number of single pass tasks"),
@@ -1833,11 +1907,13 @@ case class GpuHashAggregateExec(
     val aggMetrics = GpuHashAggregateMetrics(
       numOutputRows = gpuLongMetric(NUM_OUTPUT_ROWS),
       numOutputBatches = gpuLongMetric(NUM_OUTPUT_BATCHES),
-      numTasksFallBacked = gpuLongMetric(NUM_TASKS_FALL_BACKED),
+      numTasksRepartitioned = gpuLongMetric(NUM_TASKS_REPARTITIONED),
+      numTasksSkippedAgg = gpuLongMetric(NUM_TASKS_SKIPPED_AGG),
       opTime = gpuLongMetric(OP_TIME),
       computeAggTime = gpuLongMetric(AGG_TIME),
       concatTime = gpuLongMetric(CONCAT_TIME),
       sortTime = gpuLongMetric(SORT_TIME),
+      repartitionTime = gpuLongMetric(REPARTITION_TIME),
       numAggOps = gpuLongMetric("NUM_AGGS"),
       numPreSplits = gpuLongMetric("NUM_PRE_SPLITS"),
       singlePassTasks = gpuLongMetric("NUM_TASKS_SINGLE_PASS"),
@@ -1867,11 +1943,12 @@ case class GpuHashAggregateExec(
       val postBoundReferences = GpuAggFinalPassIterator.setupReferences(groupingExprs,
         aggregateExprs, aggregateAttrs, resultExprs, modeInfo)
 
-      new DynamicGpuPartialSortAggregateIterator(cbIter, inputAttrs, groupingExprs,
+      new DynamicGpuPartialAggregateIterator(cbIter, inputAttrs, groupingExprs,
         boundGroupExprs, aggregateExprs, aggregateAttrs, resultExprs, modeInfo,
         localEstimatedPreProcessGrowth, alreadySorted, expectedOrdering,
         postBoundReferences, targetBatchSize, aggMetrics, conf,
-        localForcePre, localAllowPre, allowNonFullyAggregatedOutput, skipAggPassReductionRatio)
+        localForcePre, localAllowPre, allowNonFullyAggregatedOutput, skipAggPassReductionRatio
+      )
     }
   }
 
@@ -1914,8 +1991,8 @@ case class GpuHashAggregateExec(
   // Used in de-duping and optimizer rules
   override def producedAttributes: AttributeSet =
     AttributeSet(aggregateAttributes) ++
-        AttributeSet(resultExpressions.diff(groupingExpressions).map(_.toAttribute)) ++
-        AttributeSet(aggregateBufferAttributes)
+      AttributeSet(resultExpressions.diff(groupingExpressions).map(_.toAttribute)) ++
+      AttributeSet(aggregateBufferAttributes)
 
   // AllTuples = distribution with a single partition and all tuples of the dataset are co-located.
   // Clustered = dataset with tuples co-located in the same partition if they share a specific value
@@ -1938,7 +2015,7 @@ case class GpuHashAggregateExec(
    */
   override lazy val allAttributes: AttributeSeq =
     child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++
-        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
+      aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)
 
   override def verboseString(maxFields: Int): String = toString(verbose = true, maxFields)
 
@@ -1957,8 +2034,8 @@ case class GpuHashAggregateExec(
         s"""${loreArgs.mkString(", ")}"""
     } else {
       s"$nodeName (keys=$keyString, functions=$functionString)," +
-          s" filters=${aggregateExpressions.map(_.filter)})" +
-          s""" ${loreArgs.mkString(", ")}"""
+        s" filters=${aggregateExpressions.map(_.filter)})" +
+        s""" ${loreArgs.mkString(", ")}"""
     }
   }
   //
@@ -1972,7 +2049,7 @@ case class GpuHashAggregateExec(
   }
 }
 
-class DynamicGpuPartialSortAggregateIterator(
+class DynamicGpuPartialAggregateIterator(
     cbIter: Iterator[ColumnarBatch],
     inputAttrs: Seq[Attribute],
     groupingExprs: Seq[NamedExpression],
@@ -1999,7 +2076,7 @@ class DynamicGpuPartialSortAggregateIterator(
   // When doing a reduction we don't have the aggIter setup for the very first time
   // so we have to match what happens for the normal reduction operations.
   override def hasNext: Boolean = aggIter.map(_.hasNext)
-      .getOrElse(isReductionOnly || cbIter.hasNext)
+    .getOrElse(isReductionOnly || cbIter.hasNext)
 
   private[this] def estimateCardinality(cb: ColumnarBatch): Int = {
     withResource(boundGroupExprs.project(cb)) { groupingKeys =>
@@ -2014,9 +2091,9 @@ class DynamicGpuPartialSortAggregateIterator(
       helper: AggHelper): (Iterator[ColumnarBatch], Boolean) = {
     // we need to decide if we are going to sort the data or not, so the very
     // first thing we need to do is get a batch and make a choice.
+    val cb = cbIter.next()
     withResource(new NvtxWithMetrics("dynamic sort heuristic", NvtxColor.BLUE,
       metrics.opTime, metrics.heuristicTime)) { _ =>
-      val cb = cbIter.next()
       lazy val estimatedGrowthAfterAgg: Double = closeOnExcept(cb) { cb =>
         val numRows = cb.numRows()
         val cardinality = estimateCardinality(cb)
@@ -2052,7 +2129,8 @@ class DynamicGpuPartialSortAggregateIterator(
       inputAttrs.map(_.dataType).toArray, preProcessAggHelper.preStepBound,
       metrics.opTime, metrics.numPreSplits)
 
-    val firstPassIter = GpuAggFirstPassIterator(sortedSplitIter, preProcessAggHelper, metrics)
+    val firstPassIter = GpuAggFirstPassIterator(sortedSplitIter, preProcessAggHelper,
+      metrics)
 
     // Technically on a partial-agg, which this only works for, this last iterator should
     // be a noop except for some metrics. But for consistency between all of the
@@ -2071,6 +2149,7 @@ class DynamicGpuPartialSortAggregateIterator(
       metrics.opTime, metrics.numPreSplits)
 
     val localInputRowsMetrics = new LocalGpuMetric
+
     val firstPassIter = GpuAggFirstPassIterator(
       splitInputIter.map(cb => {
         localInputRowsMetrics += cb.numRows()
@@ -2080,7 +2159,7 @@ class DynamicGpuPartialSortAggregateIterator(
       metrics)
 
     val mergeIter = new GpuMergeAggregateIterator(
-      firstPassIter,
+      new CloseableBufferedIterator(firstPassIter),
       inputAttrs,
       groupingExprs,
       aggregateExprs,
@@ -2092,7 +2171,8 @@ class DynamicGpuPartialSortAggregateIterator(
       conf,
       allowNonFullyAggregatedOutput,
       skipAggPassReductionRatio,
-      localInputRowsMetrics)
+      localInputRowsMetrics
+    )
 
     GpuAggFinalPassIterator.makeIter(mergeIter, postBoundReferences, metrics)
   }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
index 020220a679c..931f4b49245 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -27,11 +27,11 @@ import ai.rapids.cudf
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.jni.{CastStrings, DecimalUtils, GpuTimeZoneDB}
-import com.nvidia.spark.rapids.shims.{AnsiUtil, GpuCastShims, GpuIntervalUtils, GpuTypeShims, SparkShimImpl, YearParseUtil}
+import com.nvidia.spark.rapids.shims.{AnsiUtil, GpuCastShims, GpuIntervalUtils, GpuTypeShims, NullIntolerantShim, SparkShimImpl, YearParseUtil}
 import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, NullIntolerant, TimeZoneAwareExpression}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -41,8 +41,7 @@ import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types._
 
 /** Meta-data for cast and ansi_cast. */
-final class CastExprMeta[
-      INPUT <: UnaryLike[Expression] with TimeZoneAwareExpression with NullIntolerant](
+final class CastExprMeta[INPUT <: UnaryLike[Expression] with TimeZoneAwareExpression](
     cast: INPUT,
     val evalMode: GpuEvalMode.Value,
     conf: RapidsConf,
@@ -315,6 +314,10 @@ object GpuCast {
       fromDataType: DataType,
       toDataType: DataType,
       options: CastOptions = CastOptions.DEFAULT_CAST_OPTIONS): ColumnVector = {
+    if (options.castToJsonString && fromDataType == StringType && toDataType == StringType) {
+      // Special case because they are structurally equal
+      return escapeAndQuoteJsonString(input)
+    }
     if (DataType.equalsStructurally(fromDataType, toDataType)) {
       return input.copyToColumnVector()
     }
@@ -707,7 +710,9 @@ object GpuCast {
   def castToString(
       input: ColumnView,
       fromDataType: DataType, options: CastOptions): ColumnVector = fromDataType match {
+    case StringType if options.castToJsonString => escapeAndQuoteJsonString(input)
     case StringType => input.copyToColumnVector()
+    case DateType if options.castToJsonString => castDateToJson(input)
     case DateType => input.asStrings("%Y-%m-%d")
     case TimestampType if options.castToJsonString => castTimestampToJson(input)
     case TimestampType => castTimestampToString(input)
@@ -753,12 +758,22 @@ object GpuCast {
     }
   }
 
+  private def castDateToJson(input: ColumnView): ColumnVector = {
+    // We need to quote and escape the result.
+    withResource(input.asStrings("%Y-%m-%d")) { tmp =>
+      escapeAndQuoteJsonString(tmp)
+    }
+  }
+
   private def castTimestampToJson(input: ColumnView): ColumnVector = {
     // we fall back to CPU if the JSON timezone is not UTC, so it is safe
     // to hard-code `Z` here for now, but we should really add a timestamp
     // format to CastOptions when we add support for custom formats in
     // https://github.com/NVIDIA/spark-rapids/issues/9602
-    input.asStrings("%Y-%m-%dT%H:%M:%S.%3fZ")
+    // We also need to quote and escape the result.
+    withResource(input.asStrings("%Y-%m-%dT%H:%M:%S.%3fZ")) { tmp =>
+      escapeAndQuoteJsonString(tmp)
+    }
   }
 
   /**
@@ -887,48 +902,17 @@ object GpuCast {
 
     val numRows = input.getRowCount.toInt
 
-    /**
-     * Create a new column with quotes around the supplied string column. Caller
-     * is responsible for closing `column`.
-     */
-    def addQuotes(column: ColumnVector, rowCount: Int): ColumnVector = {
-      withResource(ArrayBuffer.empty[ColumnVector]) { columns =>
-        withResource(Scalar.fromString("\"")) { quote =>
-          withResource(ColumnVector.fromScalar(quote, rowCount)) {
-            quoteScalar =>
-              columns += quoteScalar.incRefCount()
-              columns += escapeJsonString(column)
-              columns += quoteScalar.incRefCount()
-          }
-        }
-        withResource(Scalar.fromString("")) { emptyScalar =>
-          ColumnVector.stringConcatenate(emptyScalar, emptyScalar, columns.toArray)
-        }
-      }
-    }
-
     // cast the key column and value column to string columns
     val (strKey, strValue) = withResource(input.getChildColumnView(0)) { kvStructColumn =>
       if (options.castToJsonString) {
-        // keys must have quotes around them in JSON mode
         val strKey: ColumnVector = withResource(kvStructColumn.getChildColumnView(0)) { keyColumn =>
-          withResource(castToString(keyColumn, from.keyType, options)) { key =>
-            addQuotes(key, keyColumn.getRowCount.toInt)
-          }
+          // For JSON only Strings are supported as keys so they should already come back quoted
+          castToString(keyColumn, from.keyType, options)
         }
-        // string values must have quotes around them in JSON mode, and null values need
-        // to be represented by the string literal `null`
+        // null values need to be represented by the string literal `null`
         val strValue = closeOnExcept(strKey) { _ =>
           withResource(kvStructColumn.getChildColumnView(1)) { valueColumn =>
-            val dt = valueColumn.getType
-            val valueStr = if (dt == DType.STRING || dt.isDurationType || dt.isTimestampType) {
-              withResource(castToString(valueColumn, from.valueType, options)) { valueStr =>
-                addQuotes(valueStr, valueColumn.getRowCount.toInt)
-              }
-            } else {
-              castToString(valueColumn, from.valueType, options)
-            }
-            withResource(valueStr) { _ =>
+            withResource(castToString(valueColumn, from.valueType, options)) { valueStr =>
               withResource(Scalar.fromString("null")) { nullScalar =>
                 withResource(valueColumn.isNull) { isNull =>
                   isNull.ifElse(nullScalar, valueStr)
@@ -1088,12 +1072,8 @@ object GpuCast {
     val rowCount = input.getRowCount.toInt
 
     def castToJsonAttribute(fieldIndex: Int,
-        colon: ColumnVector,
-        quote: ColumnVector): ColumnVector = {
+        colon: ColumnVector): ColumnVector = {
       val jsonName = StringEscapeUtils.escapeJson(inputSchema(fieldIndex).name)
-      val dt = inputSchema(fieldIndex).dataType
-      val needsQuoting = dt == DataTypes.StringType || dt == DataTypes.DateType ||
-        dt == DataTypes.TimestampType
       withResource(input.getChildColumnView(fieldIndex)) { cv =>
         withResource(ArrayBuffer.empty[ColumnVector]) { attrColumns =>
           // prefix with quoted column name followed by colon
@@ -1105,13 +1085,7 @@ object GpuCast {
             // write the value
             withResource(castToString(cv, inputSchema(fieldIndex).dataType, options)) {
                 attrValue =>
-              if (needsQuoting) {
-                attrColumns += quote.incRefCount()
-                attrColumns += escapeJsonString(attrValue)
-                attrColumns += quote.incRefCount()
-              } else {
-                attrColumns += attrValue.incRefCount()
-              }
+                  attrColumns += attrValue.incRefCount()
             }
             // now concatenate
             val jsonAttr = withResource(Scalar.fromString("")) { emptyString =>
@@ -1126,23 +1100,9 @@ object GpuCast {
               }
             }
           } else {
-            val jsonAttr = withResource(ArrayBuffer.empty[ColumnVector]) { attrValues =>
-              withResource(castToString(cv, inputSchema(fieldIndex).dataType, options)) {
-                  attrValue =>
-                if (needsQuoting) {
-                  attrValues += quote.incRefCount()
-                  attrValues += escapeJsonString(attrValue)
-                  attrValues += quote.incRefCount()
-                  withResource(Scalar.fromString("")) { emptyString =>
-                    ColumnVector.stringConcatenate(emptyString, emptyString, attrValues.toArray)
-                  }
-                } else {
-                  attrValue.incRefCount()
-                }
-              }
-            }
             // add attribute value, or null literal string if value is null
-            attrColumns += withResource(jsonAttr) { _ =>
+            attrColumns += withResource(castToString(cv,
+              inputSchema(fieldIndex).dataType, options)) { jsonAttr =>
               withResource(cv.isNull) { isNull =>
                 withResource(Scalar.fromString("null")) { nullScalar =>
                   isNull.ifElse(nullScalar, jsonAttr)
@@ -1158,18 +1118,18 @@ object GpuCast {
       }
     }
 
-    withResource(Seq("", ",", ":", "\"", "{", "}").safeMap(Scalar.fromString)) {
+    withResource(Seq("", ",", ":", "{", "}").safeMap(Scalar.fromString)) {
       case Seq(emptyScalar, commaScalar, columnScalars@_*) =>
             withResource(columnScalars.safeMap(s => ColumnVector.fromScalar(s, rowCount))) {
-        case Seq(colon, quote, leftBrace, rightBrace) =>
+        case Seq(colon, leftBrace, rightBrace) =>
           val jsonAttrs = withResource(ArrayBuffer.empty[ColumnVector]) { columns =>
             // create one column per attribute, which will either be in the form `"name":value` or
             // empty string for rows that have null values
             if (input.getNumChildren == 1) {
-              castToJsonAttribute(0, colon, quote)
+              castToJsonAttribute(0, colon)
             } else {
               for (i <- 0 until input.getNumChildren) {
-                columns += castToJsonAttribute(i, colon, quote)
+                columns += castToJsonAttribute(i, colon)
               }
               // concatenate the columns into one string
               withResource(ColumnVector.stringConcatenate(commaScalar,
@@ -1195,14 +1155,31 @@ object GpuCast {
   }
 
   /**
-   * Escape quotes and newlines in a string column. Caller is responsible for closing `cv`.
+   * Add quotes to and escape quotes and newlines in a string column.
+   * Caller is responsible for closing `cv`.
    */
-  private def escapeJsonString(cv: ColumnVector): ColumnVector = {
+  private def escapeAndQuoteJsonString(cv: ColumnView): ColumnVector = {
+    val rowCount = cv.getRowCount.toInt
     val chars = Seq("\r", "\n", "\\", "\"")
     val escaped = chars.map(StringEscapeUtils.escapeJava)
-    withResource(ColumnVector.fromStrings(chars: _*)) { search =>
-      withResource(ColumnVector.fromStrings(escaped: _*)) { replace =>
-        cv.stringReplace(search, replace)
+    withResource(ArrayBuffer.empty[ColumnVector]) { columns =>
+      withResource(Scalar.fromString("\"")) { quote =>
+        withResource(ColumnVector.fromScalar(quote, rowCount)) {
+          quoteScalar =>
+            columns += quoteScalar.incRefCount()
+
+            withResource(ColumnVector.fromStrings(chars: _*)) { search =>
+              withResource(ColumnVector.fromStrings(escaped: _*)) { replace =>
+                columns += cv.stringReplace(search, replace)
+              }
+            }
+            columns += quoteScalar.incRefCount()
+        }
+      }
+      withResource(Scalar.fromString("")) { emptyScalar =>
+        withResource(Scalar.fromNull(DType.STRING)) { nullScalar =>
+          ColumnVector.stringConcatenate(emptyScalar, nullScalar, columns.toArray)
+        }
       }
     }
   }
@@ -1740,7 +1717,7 @@ case class GpuCast(
     timeZoneId: Option[String] = None,
     legacyCastComplexTypesToString: Boolean = false,
     stringToDateAnsiModeEnabled: Boolean = false)
-  extends GpuUnaryExpression with TimeZoneAwareExpression with NullIntolerant {
+  extends GpuUnaryExpression with TimeZoneAwareExpression with NullIntolerantShim {
 
   import GpuCast._
 
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala
index 1afc03b177b..cc1196d44e4 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCoalesceBatches.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -223,8 +223,6 @@ case class RequireSingleBatchWithFilter(filterExpression: GpuExpression)
 case class TargetSize(override val targetSizeBytes: Long)
     extends CoalesceSizeGoal
         with SplittableGoal {
-  require(targetSizeBytes <= Integer.MAX_VALUE,
-    "Target cannot exceed 2GB without checks for cudf row count limit")
 }
 
 /**
@@ -692,6 +690,11 @@ case class BatchesToCoalesce(batches: Array[SpillableColumnarBatch])
   override def close(): Unit = {
     batches.safeClose()
   }
+
+  override def toString: String = {
+    val totalSize = batches.map(_.sizeInBytes).sum
+    s"BatchesToCoalesce totalSize:$totalSize, batches:[${batches.mkString(";")}]"
+  }
 }
 
 class GpuCoalesceIterator(iter: Iterator[ColumnarBatch],
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarBatchSerializer.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarBatchSerializer.scala
index 049f3f21bcf..54252253d38 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarBatchSerializer.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarBatchSerializer.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,19 +22,31 @@ import java.nio.ByteBuffer
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
-import ai.rapids.cudf.{HostColumnVector, HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange}
+import ai.rapids.cudf.{Cuda, HostColumnVector, HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange}
 import ai.rapids.cudf.JCudfSerialization.SerializedTableHeader
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion
+import com.nvidia.spark.rapids.jni.kudo.{KudoSerializer, KudoTable, KudoTableHeader}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, Serializer, SerializerInstance}
-import org.apache.spark.sql.types.NullType
+import org.apache.spark.sql.types.{DataType, NullType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
+/**
+ * Iterator that reads serialized tables from a stream.
+ */
+trait BaseSerializedTableIterator extends Iterator[(Int, ColumnarBatch)] {
+  /**
+   * Attempt to read the next batch size from the stream.
+   * @return the length of the data to read, or None if the stream is closed or ended
+   */
+  def peekNextBatchSize(): Option[Long]
+}
+
 class SerializedBatchIterator(dIn: DataInputStream)
-  extends Iterator[(Int, ColumnarBatch)] {
+  extends BaseSerializedTableIterator {
   private[this] var nextHeader: Option[SerializedTableHeader] = None
   private[this] var toBeReturned: Option[ColumnarBatch] = None
   private[this] var streamClosed: Boolean = false
@@ -48,8 +60,8 @@ class SerializedBatchIterator(dIn: DataInputStream)
     }
   }
 
-  def tryReadNextHeader(): Option[Long] = {
-    if (streamClosed){
+  override def peekNextBatchSize(): Option[Long] = {
+    if (streamClosed) {
       None
     } else {
       if (nextHeader.isEmpty) {
@@ -68,7 +80,7 @@ class SerializedBatchIterator(dIn: DataInputStream)
     }
   }
 
-  def tryReadNext(): Option[ColumnarBatch] = {
+  private def tryReadNext(): Option[ColumnarBatch] = {
     if (nextHeader.isEmpty) {
       None
     } else {
@@ -90,13 +102,13 @@ class SerializedBatchIterator(dIn: DataInputStream)
   }
 
   override def hasNext: Boolean = {
-    tryReadNextHeader()
+    peekNextBatchSize()
     nextHeader.isDefined
   }
 
   override def next(): (Int, ColumnarBatch) = {
     if (toBeReturned.isEmpty) {
-      tryReadNextHeader()
+      peekNextBatchSize()
       toBeReturned = tryReadNext()
       if (nextHeader.isEmpty || toBeReturned.isEmpty) {
         throw new NoSuchElementException("Walked off of the end...")
@@ -108,6 +120,7 @@ class SerializedBatchIterator(dIn: DataInputStream)
     (0, ret)
   }
 }
+
 /**
  * Serializer for serializing `ColumnarBatch`s for use during normal shuffle.
  *
@@ -124,10 +137,16 @@ class SerializedBatchIterator(dIn: DataInputStream)
  *
  * @note The RAPIDS shuffle does not use this code.
  */
-class GpuColumnarBatchSerializer(dataSize: GpuMetric)
-    extends Serializer with Serializable {
-  override def newInstance(): SerializerInstance =
-    new GpuColumnarBatchSerializerInstance(dataSize)
+class GpuColumnarBatchSerializer(dataSize: GpuMetric, dataTypes: Array[DataType], useKudo: Boolean)
+  extends Serializer with Serializable {
+  override def newInstance(): SerializerInstance = {
+    if (useKudo) {
+      new KudoSerializerInstance(dataSize, dataTypes)
+    } else {
+      new GpuColumnarBatchSerializerInstance(dataSize)
+    }
+  }
+
   override def supportsRelocationOfSerializedObjects: Boolean = true
 }
 
@@ -252,8 +271,10 @@ private class GpuColumnarBatchSerializerInstance(dataSize: GpuMetric) extends Se
 
   // These methods are never called by shuffle code.
   override def serialize[T: ClassTag](t: T): ByteBuffer = throw new UnsupportedOperationException
+
   override def deserialize[T: ClassTag](bytes: ByteBuffer): T =
     throw new UnsupportedOperationException
+
   override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
     throw new UnsupportedOperationException
 }
@@ -282,7 +303,7 @@ object SerializedTableColumn {
    * Build a `ColumnarBatch` consisting of a single [[SerializedTableColumn]] describing
    * the specified serialized table.
    *
-   * @param header header for the serialized table
+   * @param header     header for the serialized table
    * @param hostBuffer host buffer containing the table data
    * @return columnar batch to be passed to [[GpuShuffleCoalesceExec]]
    */
@@ -298,12 +319,254 @@ object SerializedTableColumn {
     if (batch.numCols == 1) {
       val cv = batch.column(0)
       cv match {
-        case serializedTableColumn: SerializedTableColumn
-            if serializedTableColumn.hostBuffer != null =>
-          sum += serializedTableColumn.hostBuffer.getLength
+        case serializedTableColumn: SerializedTableColumn =>
+          sum += Option(serializedTableColumn.hostBuffer).map(_.getLength).getOrElse(0L)
+        case kudo: KudoSerializedTableColumn =>
+          sum += Option(kudo.kudoTable.getBuffer).map(_.getLength).getOrElse(0L)
         case _ =>
+          throw new IllegalStateException(s"Unexpected column type: ${cv.getClass}" )
       }
     }
     sum
   }
 }
+
+/**
+ * Serializer instance for serializing `ColumnarBatch`s for use during shuffle with
+ * [[KudoSerializer]]
+ *
+ * @param dataSize  metric to track the size of the serialized data
+ * @param dataTypes data types of the columns in the batch
+ */
+private class KudoSerializerInstance(
+    val dataSize: GpuMetric,
+    val dataTypes: Array[DataType]) extends SerializerInstance {
+
+  private lazy val kudo = new KudoSerializer(GpuColumnVector.from(dataTypes))
+
+  override def serializeStream(out: OutputStream): SerializationStream = new SerializationStream {
+    private[this] val dOut: DataOutputStream =
+      new DataOutputStream(new BufferedOutputStream(out))
+
+    override def writeValue[T: ClassTag](value: T): SerializationStream = {
+      val batch = value.asInstanceOf[ColumnarBatch]
+      val numColumns = batch.numCols()
+      val columns: Array[HostColumnVector] = new Array(numColumns)
+      withResource(new ArrayBuffer[AutoCloseable]()) { toClose =>
+        var startRow = 0
+        val numRows = batch.numRows()
+        if (batch.numCols() > 0) {
+          val firstCol = batch.column(0)
+          if (firstCol.isInstanceOf[SlicedGpuColumnVector]) {
+            // We don't have control over ColumnarBatch to put in the slice, so we have to do it
+            // for each column.  In this case we are using the first column.
+            startRow = firstCol.asInstanceOf[SlicedGpuColumnVector].getStart
+            for (i <- 0 until numColumns) {
+              columns(i) = batch.column(i).asInstanceOf[SlicedGpuColumnVector].getBase
+            }
+          } else {
+            for (i <- 0 until numColumns) {
+              batch.column(i) match {
+                case gpu: GpuColumnVector =>
+                  val cpu = gpu.copyToHostAsync(Cuda.DEFAULT_STREAM)
+                  toClose += cpu
+                  columns(i) = cpu.getBase
+                case cpu: RapidsHostColumnVector =>
+                  columns(i) = cpu.getBase
+              }
+            }
+
+            Cuda.DEFAULT_STREAM.sync()
+          }
+
+          withResource(new NvtxRange("Serialize Batch", NvtxColor.YELLOW)) { _ =>
+            dataSize += kudo.writeToStream(columns, dOut, startRow, numRows)
+          }
+        } else {
+          withResource(new NvtxRange("Serialize Row Only Batch", NvtxColor.YELLOW)) { _ =>
+            dataSize += KudoSerializer.writeRowCountToStream(dOut, numRows)
+          }
+        }
+        this
+      }
+    }
+
+    override def writeKey[T: ClassTag](key: T): SerializationStream = {
+      // The key is only needed on the map side when computing partition ids. It does not need to
+      // be shuffled.
+      assert(null == key || key.isInstanceOf[Int])
+      this
+    }
+
+    override def writeAll[T: ClassTag](iter: Iterator[T]): SerializationStream = {
+      // This method is never called by shuffle code.
+      throw new UnsupportedOperationException
+    }
+
+    override def writeObject[T: ClassTag](t: T): SerializationStream = {
+      // This method is never called by shuffle code.
+      throw new UnsupportedOperationException
+    }
+
+    override def flush(): Unit = {
+      dOut.flush()
+    }
+
+    override def close(): Unit = {
+      dOut.close()
+    }
+  }
+
+  override def deserializeStream(in: InputStream): DeserializationStream = {
+    new DeserializationStream {
+      private[this] val dIn: DataInputStream = new DataInputStream(new BufferedInputStream(in))
+
+      override def asKeyValueIterator: Iterator[(Int, ColumnarBatch)] = {
+        new KudoSerializedBatchIterator(dIn)
+      }
+
+      override def asIterator: Iterator[Any] = {
+        // This method is never called by shuffle code.
+        throw new UnsupportedOperationException
+      }
+
+      override def readKey[T]()(implicit classType: ClassTag[T]): T = {
+        // We skipped serialization of the key in writeKey(), so just return a dummy value since
+        // this is going to be discarded anyways.
+        null.asInstanceOf[T]
+      }
+
+      override def readValue[T]()(implicit classType: ClassTag[T]): T = {
+        // This method should never be called by shuffle code.
+        throw new UnsupportedOperationException
+      }
+
+      override def readObject[T]()(implicit classType: ClassTag[T]): T = {
+        // This method is never called by shuffle code.
+        throw new UnsupportedOperationException
+      }
+
+      override def close(): Unit = {
+        dIn.close()
+      }
+    }
+  }
+
+  // These methods are never called by shuffle code.
+  override def serialize[T: ClassTag](t: T): ByteBuffer = throw new UnsupportedOperationException
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T =
+    throw new UnsupportedOperationException
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
+    throw new UnsupportedOperationException
+}
+
+/**
+ * A special `ColumnVector` that describes a serialized table read from shuffle using
+ * [[KudoSerializer]].
+ *
+ * This appears in a `ColumnarBatch` to pass serialized tables to [[GpuShuffleCoalesceExec]]
+ * which should always appear in the query plan immediately after a shuffle.
+ */
+case class KudoSerializedTableColumn(kudoTable: KudoTable) extends GpuColumnVectorBase(NullType) {
+  override def close(): Unit = {
+    if (kudoTable != null) {
+      kudoTable.close()
+    }
+  }
+
+  override def hasNull: Boolean = throw new IllegalStateException("should not be called")
+
+  override def numNulls(): Int = throw new IllegalStateException("should not be called")
+}
+
+object KudoSerializedTableColumn {
+  /**
+   * Build a `ColumnarBatch` consisting of a single [[KudoSerializedTableColumn]] describing
+   * the specified serialized table.
+   *
+   * @param kudoTable Serialized kudo table.
+   * @return columnar batch to be passed to [[GpuShuffleCoalesceExec]]
+   */
+  def from(kudoTable: KudoTable): ColumnarBatch = {
+    val column = new KudoSerializedTableColumn(kudoTable)
+    new ColumnarBatch(Array(column), kudoTable.getHeader.getNumRows)
+  }
+}
+
+class KudoSerializedBatchIterator(dIn: DataInputStream)
+  extends BaseSerializedTableIterator {
+  private[this] var nextHeader: Option[KudoTableHeader] = None
+  private[this] var toBeReturned: Option[ColumnarBatch] = None
+  private[this] var streamClosed: Boolean = false
+
+  // Don't install the callback if in a unit test
+  Option(TaskContext.get()).foreach { tc =>
+    onTaskCompletion(tc) {
+      toBeReturned.foreach(_.close())
+      toBeReturned = None
+      dIn.close()
+    }
+  }
+
+  override def peekNextBatchSize(): Option[Long] = {
+    if (streamClosed) {
+      None
+    } else {
+      if (nextHeader.isEmpty) {
+        withResource(new NvtxRange("Read Header", NvtxColor.YELLOW)) { _ =>
+          val header = Option(KudoTableHeader.readFrom(dIn).orElse(null))
+          if (header.isDefined) {
+            nextHeader = header
+          } else {
+            dIn.close()
+            streamClosed = true
+            nextHeader = None
+          }
+        }
+      }
+      nextHeader.map(_.getTotalDataLen)
+    }
+  }
+
+  private def tryReadNext(): Option[ColumnarBatch] = {
+    if (nextHeader.isEmpty) {
+      None
+    } else {
+      withResource(new NvtxRange("Read Batch", NvtxColor.YELLOW)) { _ =>
+        val header = nextHeader.get
+        if (header.getNumColumns > 0) {
+          // This buffer will later be concatenated into another host buffer before being
+          // sent to the GPU, so no need to use pinned memory for these buffers.
+          closeOnExcept(HostMemoryBuffer.allocate(header.getTotalDataLen, false)) { hostBuffer =>
+            hostBuffer.copyFromStream(0, dIn, header.getTotalDataLen)
+            val kudoTable = new KudoTable(header, hostBuffer)
+            Some(KudoSerializedTableColumn.from(kudoTable))
+          }
+        } else {
+          Some(KudoSerializedTableColumn.from(new KudoTable(header, null)))
+        }
+      }
+    }
+  }
+
+  override def hasNext: Boolean = {
+    peekNextBatchSize()
+    nextHeader.isDefined
+  }
+
+  override def next(): (Int, ColumnarBatch) = {
+    if (toBeReturned.isEmpty) {
+      peekNextBatchSize()
+      toBeReturned = tryReadNext()
+      if (nextHeader.isEmpty || toBeReturned.isEmpty) {
+        throw new NoSuchElementException("Walked off of the end...")
+      }
+    }
+    val ret = toBeReturned.get
+    toBeReturned = None
+    nextHeader = None
+    (0, ret)
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala
index 2cfce60c4a5..b0c86773166 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDeviceManager.scala
@@ -216,7 +216,7 @@ object GpuDeviceManager extends Logging {
     }
   }
 
-  private def toMB(x: Long): Double = x / 1024 / 1024.0
+  private def toMiB(x: Long): Double = x / 1024 / 1024.0
 
   private def computeRmmPoolSize(conf: RapidsConf, info: CudaMemInfo): Long = {
     def truncateToAlignment(x: Long): Long = x & ~511L
@@ -238,33 +238,39 @@ object GpuDeviceManager extends Logging {
         }
       var poolAllocation = truncateToAlignment(
         (conf.rmmAllocFraction * (info.free - reserveAmount)).toLong)
+      val errorPhrase = "The pool allocation of " +
+        s"${toMiB(poolAllocation)} MiB (gpu.free: ${toMiB(info.free)}," +
+        s"${RapidsConf.RMM_ALLOC_FRACTION}: (=${conf.rmmAllocFraction}," +
+        s"${RapidsConf.RMM_ALLOC_RESERVE}: ${reserveAmount} => " +
+        s"(gpu.free - reserve) * allocFraction = ${toMiB(poolAllocation)}) was "
       if (poolAllocation < minAllocation) {
-        throw new IllegalArgumentException(s"The pool allocation of " +
-            s"${toMB(poolAllocation)} MB (calculated from ${RapidsConf.RMM_ALLOC_FRACTION} " +
-            s"(=${conf.rmmAllocFraction}) and ${toMB(info.free)} MB free memory) was less than " +
-            s"the minimum allocation of ${toMB(minAllocation)} (calculated from " +
-            s"${RapidsConf.RMM_ALLOC_MIN_FRACTION} (=${conf.rmmAllocMinFraction}) " +
-            s"and ${toMB(info.total)} MB total memory)")
+        throw new IllegalArgumentException(errorPhrase +
+            s"less than allocation of ${toMiB(minAllocation)} MiB (gpu.total: " +
+            s"${toMiB(info.total)} MiB, ${RapidsConf.RMM_ALLOC_MIN_FRACTION}: " +
+            s"${conf.rmmAllocMinFraction} => gpu.total *" +
+            s"minAllocFraction = ${toMiB(minAllocation)} MiB). Please ensure that the GPU has " +
+            s"enough free memory, or adjust configuration accordingly.")
       }
       if (maxAllocation < poolAllocation) {
-        throw new IllegalArgumentException(s"The pool allocation of " +
-            s"${toMB(poolAllocation)} MB (calculated from ${RapidsConf.RMM_ALLOC_FRACTION} " +
-            s"(=${conf.rmmAllocFraction}) and ${toMB(info.free)} MB free memory) was more than " +
-            s"the maximum allocation of ${toMB(maxAllocation)} (calculated from " +
-            s"${RapidsConf.RMM_ALLOC_MAX_FRACTION} (=${conf.rmmAllocMaxFraction}) " +
-            s"and ${toMB(info.total)} MB total memory)")
+        throw new IllegalArgumentException(errorPhrase +
+            s"more than allocation of ${toMiB(maxAllocation)} MiB (gpu.total: " +
+            s"${toMiB(info.total)} MiB, ${RapidsConf.RMM_ALLOC_MAX_FRACTION}: " +
+            s"${conf.rmmAllocMaxFraction} => gpu.total *" +
+            s"maxAllocFraction = ${toMiB(maxAllocation)} MiB). Please ensure that pool " +
+            s"allocation does not exceed maximum allocation and adjust configuration accordingly.")
       }
       if (reserveAmount >= maxAllocation) {
-        throw new IllegalArgumentException(s"RMM reserve memory (${toMB(reserveAmount)} MB) " +
-            s"larger than maximum pool size (${toMB(maxAllocation)} MB). Check the settings for " +
+        throw new IllegalArgumentException(s"RMM reserve memory (${toMiB(reserveAmount)} MB) " +
+            s"larger than maximum pool size (${toMiB(maxAllocation)} MB). Check the settings for " +
             s"${RapidsConf.RMM_ALLOC_MAX_FRACTION} (=${conf.rmmAllocFraction}) and " +
             s"${RapidsConf.RMM_ALLOC_RESERVE} (=$reserveAmount)")
       }
       val adjustedMaxAllocation = truncateToAlignment(maxAllocation - reserveAmount)
       if (poolAllocation > adjustedMaxAllocation) {
-        logWarning(s"RMM pool allocation (${toMB(poolAllocation)} MB) does not leave enough free " +
-            s"memory for reserve memory (${toMB(reserveAmount)} MB), lowering the pool size to " +
-            s"${toMB(adjustedMaxAllocation)} MB to accommodate the requested reserve amount.")
+        logWarning(s"RMM pool allocation (${toMiB(poolAllocation)} MB) does not leave enough " +
+          s"free memory for reserve memory (${toMiB(reserveAmount)} MB), lowering the pool " +
+          s"size to ${toMiB(adjustedMaxAllocation)} MB to " +
+          s"accommodate the requested reserve amount.")
         poolAllocation = adjustedMaxAllocation
       }
 
@@ -348,7 +354,7 @@ object GpuDeviceManager extends Logging {
       deviceId = Some(gpuId)
 
       logInfo(s"Initializing RMM${features.mkString(" ", " ", "")} " +
-          s"pool size = ${toMB(poolAllocation)} MB on gpuId $gpuId")
+          s"pool size = ${toMiB(poolAllocation)} MB on gpuId $gpuId")
 
       if (Cuda.isPtdsEnabled()) {
         logInfo("Using per-thread default stream")
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
index 17f2c35a8eb..3d9b6285a91 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExec.scala
@@ -66,6 +66,7 @@ object GpuMetric extends Logging {
   val COLLECT_TIME = "collectTime"
   val CONCAT_TIME = "concatTime"
   val SORT_TIME = "sortTime"
+  val REPARTITION_TIME = "repartitionTime"
   val AGG_TIME = "computeAggTime"
   val JOIN_TIME = "joinTime"
   val FILTER_TIME = "filterTime"
@@ -73,6 +74,8 @@ object GpuMetric extends Logging {
   val BUILD_TIME = "buildTime"
   val STREAM_TIME = "streamTime"
   val NUM_TASKS_FALL_BACKED = "numTasksFallBacked"
+  val NUM_TASKS_REPARTITIONED = "numTasksRepartitioned"
+  val NUM_TASKS_SKIPPED_AGG = "numTasksSkippedAgg"
   val READ_FS_TIME = "readFsTime"
   val WRITE_BUFFER_TIME = "writeBufferTime"
   val FILECACHE_FOOTER_HITS = "filecacheFooterHits"
@@ -87,6 +90,8 @@ object GpuMetric extends Logging {
   val FILECACHE_DATA_RANGE_READ_TIME = "filecacheDataRangeReadTime"
   val DELETION_VECTOR_SCATTER_TIME = "deletionVectorScatterTime"
   val DELETION_VECTOR_SIZE = "deletionVectorSize"
+  val CONCAT_HEADER_TIME = "concatHeaderTime"
+  val CONCAT_BUFFER_TIME = "concatBufferTime"
 
   // Metric Descriptions.
   val DESCRIPTION_BUFFER_TIME = "buffer time"
@@ -102,6 +107,7 @@ object GpuMetric extends Logging {
   val DESCRIPTION_COLLECT_TIME = "collect batch time"
   val DESCRIPTION_CONCAT_TIME = "concat batch time"
   val DESCRIPTION_SORT_TIME = "sort time"
+  val DESCRIPTION_REPARTITION_TIME = "repartition time"
   val DESCRIPTION_AGG_TIME = "aggregation time"
   val DESCRIPTION_JOIN_TIME = "join time"
   val DESCRIPTION_FILTER_TIME = "filter time"
@@ -109,6 +115,8 @@ object GpuMetric extends Logging {
   val DESCRIPTION_BUILD_TIME = "build time"
   val DESCRIPTION_STREAM_TIME = "stream time"
   val DESCRIPTION_NUM_TASKS_FALL_BACKED = "number of sort fallback tasks"
+  val DESCRIPTION_NUM_TASKS_REPARTITIONED = "number of tasks repartitioned for agg"
+  val DESCRIPTION_NUM_TASKS_SKIPPED_AGG = "number of tasks skipped aggregation"
   val DESCRIPTION_READ_FS_TIME = "time to read fs data"
   val DESCRIPTION_WRITE_BUFFER_TIME = "time to write data to buffer"
   val DESCRIPTION_FILECACHE_FOOTER_HITS = "cached footer hits"
@@ -123,6 +131,8 @@ object GpuMetric extends Logging {
   val DESCRIPTION_FILECACHE_DATA_RANGE_READ_TIME = "cached data read time"
   val DESCRIPTION_DELETION_VECTOR_SCATTER_TIME = "deletion vector scatter time"
   val DESCRIPTION_DELETION_VECTOR_SIZE = "deletion vector size"
+  val DESCRIPTION_CONCAT_HEADER_TIME = "concat header time"
+  val DESCRIPTION_CONCAT_BUFFER_TIME = "concat buffer time"
 
   def unwrap(input: GpuMetric): SQLMetric = input match {
     case w :WrappedGpuMetric => w.sqlMetric
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuExecutedCommandExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExecutedCommandExec.scala
similarity index 100%
rename from sql-plugin/src/main/java/com/nvidia/spark/rapids/GpuExecutedCommandExec.scala
rename to sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuExecutedCommandExec.scala
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGenerateExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGenerateExec.scala
index cf83c5b1264..239b7a3d4c0 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGenerateExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuGenerateExec.scala
@@ -953,6 +953,9 @@ class BatchToGenerate(val fixUpOffset: Long, val spillable: SpillableColumnarBat
   override def close(): Unit = {
     spillable.close()
   }
+
+  override def toString: String =
+    s"BatchToGenerate fixUpOffset:$fixUpOffset, spillable:$spillable"
 }
 
 class GpuGenerateIterator(
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala
index faca6d8e3c7..ab02d1f0eea 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuMultiFileReader.scala
@@ -122,14 +122,21 @@ object MultiFileReaderThreadPool extends Logging {
   private var threadPool: Option[ThreadPoolExecutor] = None
 
   private def initThreadPool(
-      maxThreads: Int,
+      numThreadsFromConf: Int,
       keepAliveSeconds: Int = 60): ThreadPoolExecutor = synchronized {
     if (threadPool.isEmpty) {
+      val numThreads = Math.max(numThreadsFromConf, GpuDeviceManager.getNumCores)
+
+      if (numThreadsFromConf != numThreads) {
+        logWarning(s"Configuring the file reader thread pool with a max of $numThreads " +
+            s"threads instead of ${RapidsConf.MULTITHREAD_READ_NUM_THREADS} = $numThreadsFromConf")
+      }
+
       val threadPoolExecutor =
-        TrampolineUtil.newDaemonCachedThreadPool("multithreaded file reader worker", maxThreads,
+        TrampolineUtil.newDaemonCachedThreadPool("multithreaded file reader worker", numThreads,
           keepAliveSeconds)
       threadPoolExecutor.allowCoreThreadTimeOut(true)
-      logDebug(s"Using $maxThreads for the multithreaded reader thread pool")
+      logDebug(s"Using $numThreads for the multithreaded reader thread pool")
       threadPool = Some(threadPoolExecutor)
     }
     threadPool.get
@@ -142,13 +149,7 @@ object MultiFileReaderThreadPool extends Logging {
    */
   def getOrCreateThreadPool(numThreadsFromConf: Int): ThreadPoolExecutor = {
     threadPool.getOrElse {
-      val numThreads = Math.max(numThreadsFromConf, GpuDeviceManager.getNumCores)
-
-      if (numThreadsFromConf != numThreads) {
-        logWarning(s"Configuring the file reader thread pool with a max of $numThreads " +
-            s"threads instead of ${RapidsConf.MULTITHREAD_READ_NUM_THREADS} = $numThreadsFromConf")
-      }
-      initThreadPool(numThreads)
+      initThreadPool(numThreadsFromConf)
     }
   }
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 97440388c9f..45905f0b9e0 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -848,6 +848,8 @@ object GpuOverrides extends Logging {
   val jsonStructReadTypes: TypeSig = (TypeSig.STRUCT + TypeSig.ARRAY +
       TypeSig.STRING + TypeSig.integral + TypeSig.fp + TypeSig.DECIMAL_128 + TypeSig.BOOLEAN +
       TypeSig.DATE + TypeSig.TIMESTAMP).nested()
+    .withPsNote(TypeEnum.DATE, "DATE is not supported by default due to compatibility")
+    .withPsNote(TypeEnum.TIMESTAMP, "TIMESTAMP is not supported by default due to compatibility")
 
   lazy val fileFormats: Map[FileFormatType, Map[FileFormatOp, FileFormatChecks]] = Map(
     (CsvFormatType, FileFormatChecks(
@@ -1152,7 +1154,6 @@ object GpuOverrides extends Logging {
         override val isFoldableNonLitAllowed: Boolean = true
         override def convertToGpu(): GpuExpression =
           GpuUnaryPositive(childExprs.head.convertToGpu())
-        
       }),
     expr[Year](
       "Returns the year from a date or timestamp",
@@ -1794,7 +1795,7 @@ object GpuOverrides extends Logging {
       ExprChecks.binaryProject(TypeSig.TIMESTAMP, TypeSig.TIMESTAMP,
         ("timestamp", TypeSig.TIMESTAMP, TypeSig.TIMESTAMP),
         ("timezone", TypeSig.lit(TypeEnum.STRING)
-          .withPsNote(TypeEnum.STRING, 
+          .withPsNote(TypeEnum.STRING,
             "Only non-DST(Daylight Savings Time) timezones are supported"),
           TypeSig.lit(TypeEnum.STRING))),
       (a, conf, p, r) => new FromUTCTimestampExprMeta(a, conf, p, r)
@@ -1804,11 +1805,22 @@ object GpuOverrides extends Logging {
       ExprChecks.binaryProject(TypeSig.TIMESTAMP, TypeSig.TIMESTAMP,
         ("timestamp", TypeSig.TIMESTAMP, TypeSig.TIMESTAMP),
         ("timezone", TypeSig.lit(TypeEnum.STRING)
-          .withPsNote(TypeEnum.STRING, 
+          .withPsNote(TypeEnum.STRING,
             "Only non-DST(Daylight Savings Time) timezones are supported"),
           TypeSig.lit(TypeEnum.STRING))),
       (a, conf, p, r) => new ToUTCTimestampExprMeta(a, conf, p, r)
     ),
+    expr[MonthsBetween](
+      "If `timestamp1` is later than `timestamp2`, then the result " +
+        "is positive. If `timestamp1` and `timestamp2` are on the same day of month, or both " +
+        "are the last day of month, time of day will be ignored. Otherwise, the difference is " +
+        "calculated based on 31 days per month, and rounded to 8 digits unless roundOff=false.",
+      ExprChecks.projectOnly(TypeSig.DOUBLE, TypeSig.DOUBLE,
+        Seq(ParamCheck("timestamp1", TypeSig.TIMESTAMP, TypeSig.TIMESTAMP),
+          ParamCheck("timestamp2", TypeSig.TIMESTAMP, TypeSig.TIMESTAMP),
+          ParamCheck("round", TypeSig.lit(TypeEnum.BOOLEAN), TypeSig.BOOLEAN))),
+      (a, conf, p, r) => new MonthsBetweenExprMeta(a, conf, p, r)
+    ),
     expr[Pmod](
       "Pmod",
       // Decimal support disabled https://github.com/NVIDIA/spark-rapids/issues/7553
@@ -2281,9 +2293,9 @@ object GpuOverrides extends Logging {
           ParamCheck("value", (TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP + TypeSig.BINARY
             + TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128).nested(),
             TypeSig.all),
-          ParamCheck("ordering", (TypeSig.commonCudfTypes - TypeSig.fp + TypeSig.DECIMAL_128 + 
+          ParamCheck("ordering", (TypeSig.commonCudfTypes - TypeSig.fp + TypeSig.DECIMAL_128 +
             TypeSig.NULL + TypeSig.STRUCT + TypeSig.ARRAY).nested(
-              TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + 
+              TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 +
               TypeSig.NULL + TypeSig.STRUCT + TypeSig.ARRAY),
             TypeSig.orderable))
       ),
@@ -2310,9 +2322,9 @@ object GpuOverrides extends Logging {
           ParamCheck("value", (TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP + TypeSig.BINARY
             + TypeSig.commonCudfTypes + TypeSig.NULL + TypeSig.DECIMAL_128).nested(),
             TypeSig.all),
-          ParamCheck("ordering", (TypeSig.commonCudfTypes - TypeSig.fp + TypeSig.DECIMAL_128 + 
+          ParamCheck("ordering", (TypeSig.commonCudfTypes - TypeSig.fp + TypeSig.DECIMAL_128 +
             TypeSig.NULL + TypeSig.STRUCT + TypeSig.ARRAY).nested(
-              TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 + 
+              TypeSig.commonCudfTypes + TypeSig.DECIMAL_128 +
               TypeSig.NULL + TypeSig.STRUCT + TypeSig.ARRAY),
             TypeSig.orderable))
       ),
@@ -3368,7 +3380,7 @@ object GpuOverrides extends Logging {
       ExprChecks.projectOnly(TypeSig.STRING, TypeSig.STRING,
         Seq(ParamCheck("url", TypeSig.STRING, TypeSig.STRING),
           ParamCheck("partToExtract", TypeSig.lit(TypeEnum.STRING).withPsNote(
-            TypeEnum.STRING, "only support partToExtract = PROTOCOL | HOST | QUERY | PATH"), 
+            TypeEnum.STRING, "only support partToExtract = PROTOCOL | HOST | QUERY | PATH"),
             TypeSig.STRING)),
           // Should really be an OptionalParam
           Some(RepeatingParamCheck("key", TypeSig.STRING, TypeSig.STRING))),
@@ -3377,9 +3389,9 @@ object GpuOverrides extends Logging {
           if (a.failOnError) {
             willNotWorkOnGpu("Fail on error is not supported on GPU when parsing urls.")
           }
-          
-          extractStringLit(a.children(1)).map(_.toUpperCase) match {
-            // In Spark, the key in parse_url could act like a regex, but GPU will match the key 
+
+          extractStringLit(a.children(1)) match {
+            // In Spark, the key in parse_url could act like a regex, but GPU will match the key
             // exactly. When key is literal, GPU will check if the key contains regex special and
             // fallbcak to CPU if it does, but we are not able to fallback when key is column.
             // see Spark issue: https://issues.apache.org/jira/browse/SPARK-44500
@@ -3388,7 +3400,7 @@ object GpuOverrides extends Logging {
                 if (key.value != null) {
                   val keyStr = key.value.asInstanceOf[UTF8String].toString
                   if (regexMetaChars.exists(keyStr.contains(_))) {
-                    willNotWorkOnGpu(s"Key $keyStr could act like a regex which is not " + 
+                    willNotWorkOnGpu(s"Key $keyStr could act like a regex which is not " +
                         "supported on GPU")
                   }
                 }
@@ -3768,7 +3780,8 @@ object GpuOverrides extends Logging {
       ExprChecks.projectOnly(
         TypeSig.STRUCT.nested(jsonStructReadTypes) +
           TypeSig.MAP.nested(TypeSig.STRING).withPsNote(TypeEnum.MAP,
-          "MAP only supports keys and values that are of STRING type"),
+          "MAP only supports keys and values that are of STRING type " +
+            "and is only supported at the top level"),
         (TypeSig.STRUCT + TypeSig.MAP + TypeSig.ARRAY).nested(TypeSig.all),
         Seq(ParamCheck("jsonStr", TypeSig.STRING, TypeSig.STRING))),
       (a, conf, p, r) => new UnaryExprMeta[JsonToStructs](a, conf, p, r) {
@@ -3780,6 +3793,11 @@ object GpuOverrides extends Logging {
             case _ => false
           })
 
+        def hasDateTimeType(dt: DataType): Boolean =
+          TrampolineUtil.dataTypeExistsRecursively(dt, t =>
+            t.isInstanceOf[DateType] || t.isInstanceOf[TimestampType]
+          )
+
         override def tagExprForGpu(): Unit = {
           a.schema match {
             case MapType(_: StringType, _: StringType, _) => ()
@@ -3788,7 +3806,11 @@ object GpuOverrides extends Logging {
                 willNotWorkOnGpu("from_json on GPU does not support duplicate field " +
                     "names in a struct")
               }
-              ()
+              if (hasDateTimeType(st) && !this.conf.isJsonDateTimeReadEnabled) {
+                willNotWorkOnGpu("from_json on GPU does not support DateType or TimestampType" +
+                  " by default due to compatibility. " +
+                  "Set `spark.rapids.sql.json.read.datetime.enabled` to `true` to enable them.")
+              }
             case _ =>
               willNotWorkOnGpu("from_json on GPU only supports MapType<StringType, StringType> " +
                 "or StructType schema")
@@ -3800,10 +3822,7 @@ object GpuOverrides extends Logging {
         override def convertToGpu(child: Expression): GpuExpression =
           // GPU implementation currently does not support duplicated json key names in input
           GpuJsonToStructs(a.schema, a.options, child, a.timeZoneId)
-      }).disabledByDefault("it is currently in beta and undergoes continuous enhancements."+
-      " Please consult the "+
-      "[compatibility documentation](../compatibility.md#json-supporting-types)"+
-      " to determine whether you can enable this configuration for your use case"),
+      }),
     expr[StructsToJson](
       "Converts structs to JSON text format",
       ExprChecks.projectOnly(
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
index 25105386b3d..2b5f246e56a 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
@@ -271,13 +271,19 @@ class GpuParquetFileFormat extends ColumnarFileFormat with Logging {
           s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
     }
 
+    val asyncOutputWriteEnabled = RapidsConf.ENABLE_ASYNC_OUTPUT_WRITE.get(sqlConf)
+    // holdGpuBetweenBatches is on by default if asyncOutputWriteEnabled is on
+    val holdGpuBetweenBatches = RapidsConf.ASYNC_QUERY_OUTPUT_WRITE_HOLD_GPU_IN_TASK.get(sqlConf)
+      .getOrElse(asyncOutputWriteEnabled)
+
     new ColumnarOutputWriterFactory {
         override def newInstance(
           path: String,
           dataSchema: StructType,
           context: TaskAttemptContext): ColumnarOutputWriter = {
         new GpuParquetWriter(path, dataSchema, compressionType, outputTimestampType.toString,
-          dateTimeRebaseMode, timestampRebaseMode, context, parquetFieldIdWriteEnabled)
+          dateTimeRebaseMode, timestampRebaseMode, context, parquetFieldIdWriteEnabled,
+          holdGpuBetweenBatches)
       }
 
       override def getFileExtension(context: TaskAttemptContext): String = {
@@ -299,8 +305,9 @@ class GpuParquetWriter(
     dateRebaseMode: DateTimeRebaseMode,
     timestampRebaseMode: DateTimeRebaseMode,
     context: TaskAttemptContext,
-    parquetFieldIdEnabled: Boolean)
-  extends ColumnarOutputWriter(context, dataSchema, "Parquet", true) {
+    parquetFieldIdEnabled: Boolean,
+    holdGpuBetweenBatches: Boolean)
+  extends ColumnarOutputWriter(context, dataSchema, "Parquet", true, holdGpuBetweenBatches) {
   override def throwIfRebaseNeededInExceptionMode(batch: ColumnarBatch): Unit = {
     val cols = GpuColumnVector.extractBases(batch)
     cols.foreach { col =>
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
index f9c0d88ba6c..03eb48de6fb 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetScan.scala
@@ -16,7 +16,7 @@
 
 package com.nvidia.spark.rapids
 
-import java.io.{Closeable, EOFException, FileNotFoundException, IOException, OutputStream}
+import java.io.{Closeable, EOFException, FileNotFoundException, InputStream, IOException, OutputStream}
 import java.net.URI
 import java.nio.ByteBuffer
 import java.nio.channels.SeekableByteChannel
@@ -31,6 +31,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.language.implicitConversions
 
 import ai.rapids.cudf._
+import com.github.luben.zstd.ZstdDecompressCtx
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.GpuMetric._
 import com.nvidia.spark.rapids.ParquetPartitionReader.{CopyRange, LocalCopy}
@@ -47,6 +48,7 @@ import org.apache.parquet.bytes.BytesUtils
 import org.apache.parquet.bytes.BytesUtils.readIntLittleEndian
 import org.apache.parquet.column.ColumnDescriptor
 import org.apache.parquet.filter2.predicate.FilterApi
+import org.apache.parquet.format.Util
 import org.apache.parquet.format.converter.ParquetMetadataConverter
 import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat}
 import org.apache.parquet.hadoop.ParquetFileWriter.MAGIC
@@ -54,6 +56,7 @@ import org.apache.parquet.hadoop.metadata._
 import org.apache.parquet.io.{InputFile, SeekableInputStream}
 import org.apache.parquet.schema.{DecimalMetadata, GroupType, MessageType, OriginalType, PrimitiveType, Type}
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+import org.xerial.snappy.Snappy
 
 import org.apache.spark.TaskContext
 import org.apache.spark.broadcast.Broadcast
@@ -952,7 +955,7 @@ private case class GpuParquetFileFilterHandler(
 
       case PrimitiveTypeName.INT32 =>
         if (dt == DataTypes.IntegerType || GpuTypeShims.isSupportedYearMonthType(dt)
-            || canReadAsIntDecimal(pt, dt)) {
+          || canReadAsDecimal(pt, dt)) {
           // Year-month interval type is stored as int32 in parquet
           return
         }
@@ -967,7 +970,7 @@ private case class GpuParquetFileFilterHandler(
       case PrimitiveTypeName.INT64 =>
         if (dt == DataTypes.LongType || GpuTypeShims.isSupportedDayTimeType(dt) ||
             // Day-time interval type is stored as int64 in parquet
-            canReadAsLongDecimal(pt, dt)) {
+          canReadAsDecimal(pt, dt)) {
           return
         }
         // TODO: After we deprecate Spark 3.1, replace OriginalType with LogicalTypeAnnotation
@@ -993,12 +996,7 @@ private case class GpuParquetFileFilterHandler(
         return
 
       case PrimitiveTypeName.BINARY | PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
-        if dt == DataTypes.BinaryType =>
-        return
-
-      case PrimitiveTypeName.BINARY | PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY
-        if canReadAsIntDecimal(pt, dt) || canReadAsLongDecimal(pt, dt) ||
-            canReadAsBinaryDecimal(pt, dt) =>
+        if dt == DataTypes.BinaryType || canReadAsDecimal(pt, dt) =>
         return
 
       case _ =>
@@ -1031,22 +1029,12 @@ private case class GpuParquetFileFilterHandler(
       case _ => false
     }
 
-  // TODO: After we deprecate Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
-  @scala.annotation.nowarn("msg=method getDecimalMetadata in class PrimitiveType is deprecated")
-  private def canReadAsIntDecimal(pt: PrimitiveType, dt: DataType) = {
-    DecimalType.is32BitDecimalType(dt) && isDecimalTypeMatched(pt.getDecimalMetadata, dt)
-  }
-
-  // TODO: After we deprecate Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
+  // TODO: Since we have deprecated Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
   @scala.annotation.nowarn("msg=method getDecimalMetadata in class PrimitiveType is deprecated")
-  private def canReadAsLongDecimal(pt: PrimitiveType, dt: DataType): Boolean = {
-    DecimalType.is64BitDecimalType(dt) && isDecimalTypeMatched(pt.getDecimalMetadata, dt)
-  }
-
-  // TODO: After we deprecate Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
-  @scala.annotation.nowarn("msg=method getDecimalMetadata in class PrimitiveType is deprecated")
-  private def canReadAsBinaryDecimal(pt: PrimitiveType, dt: DataType): Boolean = {
-    DecimalType.isByteArrayDecimalType(dt) && isDecimalTypeMatched(pt.getDecimalMetadata, dt)
+  private def canReadAsDecimal(pt: PrimitiveType, dt: DataType): Boolean = {
+    (DecimalType.is32BitDecimalType(dt)
+      || DecimalType.is64BitDecimalType(dt)
+      || DecimalType.isByteArrayDecimalType(dt)) && isDecimalTypeMatched(pt.getDecimalMetadata, dt)
   }
 
   // TODO: After we deprecate Spark 3.1, fetch decimal meta with DecimalLogicalTypeAnnotation
@@ -1057,7 +1045,9 @@ private case class GpuParquetFileFilterHandler(
       false
     } else {
       val dt = sparkType.asInstanceOf[DecimalType]
-      metadata.getPrecision <= dt.precision && metadata.getScale == dt.scale
+      val scaleIncrease = dt.scale - metadata.getScale
+      val precisionIncrease = dt.precision - metadata.getPrecision
+      scaleIncrease >= 0 && precisionIncrease >= scaleIncrease
     }
   }
 }
@@ -1119,6 +1109,7 @@ case class GpuParquetMultiFilePartitionReaderFactory(
       }.getOrElse(rapidsConf.getMultithreadedReaderKeepOrder)
   private val alluxioReplacementTaskTime =
     AlluxioCfgUtils.enabledAlluxioReplacementAlgoTaskTime(rapidsConf)
+  private val compressCfg = CpuCompressionConfig.forParquet(rapidsConf)
 
   // We can't use the coalescing files reader when InputFileName, InputFileBlockStart,
   // or InputFileBlockLength because we are combining all the files into a single buffer
@@ -1150,7 +1141,7 @@ case class GpuParquetMultiFilePartitionReaderFactory(
     new MultiFileCloudParquetPartitionReader(conf, files, filterFunc, isCaseSensitive,
       debugDumpPrefix, debugDumpAlways, maxReadBatchSizeRows, maxReadBatchSizeBytes,
       targetBatchSizeBytes, maxGpuColumnSizeBytes,
-      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes,
+      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, compressCfg,
       metrics, partitionSchema, numThreads, maxNumFileProcessed, ignoreMissingFiles,
       ignoreCorruptFiles, readUseFieldId, alluxioPathReplacementMap.getOrElse(Map.empty),
       alluxioReplacementTaskTime, queryUsesInputFile, keepReadsInOrderFromConf, combineConf)
@@ -1257,7 +1248,7 @@ case class GpuParquetMultiFilePartitionReaderFactory(
       clippedBlocks ++= singleFileInfo.blocks.map(block =>
         ParquetSingleDataBlockMeta(
           singleFileInfo.filePath,
-          ParquetDataBlock(block),
+          ParquetDataBlock(block, compressCfg),
           metaAndFile.file.partitionValues,
           ParquetSchemaWrapper(singleFileInfo.schema),
           singleFileInfo.readSchema,
@@ -1275,7 +1266,7 @@ case class GpuParquetMultiFilePartitionReaderFactory(
     new MultiFileParquetPartitionReader(conf, files, clippedBlocks.toSeq, isCaseSensitive,
       debugDumpPrefix, debugDumpAlways, maxReadBatchSizeRows, maxReadBatchSizeBytes,
       targetBatchSizeBytes, maxGpuColumnSizeBytes,
-      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes,
+      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, compressCfg,
       metrics, partitionSchema, numThreads, ignoreMissingFiles, ignoreCorruptFiles, readUseFieldId)
   }
 
@@ -1320,6 +1311,7 @@ case class GpuParquetPartitionReaderFactory(
   private val readUseFieldId = ParquetSchemaClipShims.useFieldId(sqlConf)
   private val footerReadType = GpuParquetScan.footerReaderHeuristic(
     rapidsConf.parquetReaderFooterType, dataSchema, readDataSchema, readUseFieldId)
+  private val compressCfg = CpuCompressionConfig.forParquet(rapidsConf)
 
   override def supportColumnarReads(partition: InputPartition): Boolean = true
 
@@ -1348,12 +1340,29 @@ case class GpuParquetPartitionReaderFactory(
     new ParquetPartitionReader(conf, file, singleFileInfo.filePath, singleFileInfo.blocks,
       singleFileInfo.schema, isCaseSensitive, readDataSchema, debugDumpPrefix, debugDumpAlways,
       maxReadBatchSizeRows, maxReadBatchSizeBytes, targetSizeBytes,
-      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes,
+      useChunkedReader, maxChunkedReaderMemoryUsageSizeBytes, compressCfg,
       metrics, singleFileInfo.dateRebaseMode,
       singleFileInfo.timestampRebaseMode, singleFileInfo.hasInt96Timestamps, readUseFieldId)
   }
 }
 
+case class CpuCompressionConfig(
+    decompressSnappyCpu: Boolean,
+    decompressZstdCpu: Boolean) {
+  val decompressAnyCpu: Boolean = decompressSnappyCpu || decompressZstdCpu
+}
+
+object CpuCompressionConfig {
+  def forParquet(conf: RapidsConf): CpuCompressionConfig = {
+    val cpuEnable = conf.parquetDecompressCpu
+    CpuCompressionConfig(
+      decompressSnappyCpu = cpuEnable && conf.parquetDecompressCpuSnappy,
+      decompressZstdCpu = cpuEnable && conf.parquetDecompressCpuZstd)
+  }
+
+  def disabled(): CpuCompressionConfig = CpuCompressionConfig(false, false)
+}
+
 trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
     with MultiFileReaderFunctions {
   // the size of Parquet magic (at start+end) and footer length values
@@ -1366,6 +1375,8 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
 
   def isSchemaCaseSensitive: Boolean
 
+  def compressCfg: CpuCompressionConfig
+
   val copyBufferSize = conf.getInt("parquet.read.allocation.size", 8 * 1024 * 1024)
 
   def checkIfNeedToSplitBlocks(currentDateRebaseMode: DateTimeRebaseMode,
@@ -1431,13 +1442,8 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
       schema: MessageType,
       handleCoalesceFiles: Boolean): Long = {
     // start with the size of Parquet magic (at start+end) and footer length values
-    var size: Long = PARQUET_META_SIZE
-
-    // Calculate the total amount of column data that will be copied
-    // NOTE: Avoid using block.getTotalByteSize here as that is the
-    //       uncompressed size rather than the size in the file.
-    size += currentChunkedBlocks.flatMap(_.getColumns.asScala.map(_.getTotalSize)).sum
-
+    val headerSize: Long = PARQUET_META_SIZE
+    val blocksSize = ParquetPartitionReader.computeOutputSize(currentChunkedBlocks, compressCfg)
     val footerSize = calculateParquetFooterSize(currentChunkedBlocks, schema)
     val extraMemory = if (handleCoalesceFiles) {
       val numCols = currentChunkedBlocks.head.getColumns().size()
@@ -1445,8 +1451,7 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
     } else {
       0
     }
-    val totalSize = size + footerSize + extraMemory
-    totalSize
+    headerSize + blocksSize + footerSize + extraMemory
   }
 
   protected def writeFooter(
@@ -1545,7 +1550,7 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
    * metadata but with the file offsets updated to reflect the new position of the column data
    * as written to the output.
    *
-   * @param in  the input stream for the original Parquet file
+   * @param filePath the path to the Parquet file
    * @param out the output stream to receive the data
    * @param blocks block metadata from the original file that will appear in the computed file
    * @param realStartOffset starting file offset of the first block
@@ -1588,6 +1593,258 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
     computeBlockMetaData(blocks, realStartOffset)
   }
 
+  private class BufferedFileInput(
+      filePath: Path,
+      blocks: Seq[BlockMetaData],
+      metrics: Map[String, GpuMetric]) extends InputStream {
+    private[this] val in = filePath.getFileSystem(conf).open(filePath)
+    private[this] val buffer: Array[Byte] = new Array[Byte](copyBufferSize)
+    private[this] var bufferSize: Int = 0
+    private[this] var bufferFilePos: Long = in.getPos
+    private[this] var bufferPos: Int = 0
+    private[this] val columnIter = blocks.flatMap(_.getColumns.asScala).iterator
+    private[this] var currentColumn: Option[ColumnChunkMetaData] = None
+    private[this] val readTime: GpuMetric = metrics.getOrElse(READ_FS_TIME, NoopMetric)
+
+    override def read(): Int = {
+      while (bufferPos == bufferSize) {
+        fillBuffer()
+      }
+      val result = buffer(bufferPos)
+      bufferPos += 1
+      result
+    }
+
+    override def read(b: Array[Byte]): Int = read(b, 0, b.length)
+
+    override def read(dest: Array[Byte], off: Int, len: Int): Int = {
+      var bytesLeft = len
+      while (bytesLeft > 0) {
+        if (bufferPos == bufferSize) {
+          fillBuffer()
+        }
+        val numBytes = Math.min(bytesLeft, bufferSize - bufferPos)
+        System.arraycopy(buffer, bufferPos, dest,  off + len - bytesLeft, numBytes)
+        bufferPos += numBytes
+        bytesLeft -= numBytes
+      }
+      len
+    }
+
+    def read(out: HostMemoryOutputStream, len: Long): Unit = {
+      var bytesLeft = len
+      while (bytesLeft > 0) {
+        if (bufferPos == bufferSize) {
+          fillBuffer()
+        }
+        // downcast is safe because bufferSize is an int
+        val numBytes = Math.min(bytesLeft, bufferSize - bufferPos).toInt
+        out.write(buffer, bufferPos, numBytes)
+        bufferPos += numBytes
+        bytesLeft -= numBytes
+      }
+    }
+
+    def read(out: HostMemoryBuffer, len: Long): Unit = {
+      var bytesLeft = len
+      while (bytesLeft > 0) {
+        if (bufferPos == bufferSize) {
+          fillBuffer()
+        }
+        // downcast is safe because bufferSize is an int
+        val numBytes = Math.min(bytesLeft, bufferSize - bufferPos).toInt
+        out.setBytes(len - bytesLeft, buffer, bufferPos, numBytes)
+        bufferPos += numBytes
+        bytesLeft -= numBytes
+      }
+    }
+
+    override def skip(n: Long): Long = {
+      seek(getPos + n)
+      n
+    }
+
+    def getPos: Long = bufferFilePos + bufferPos
+
+    def seek(desiredPos: Long): Unit = {
+      require(desiredPos >= getPos, "Only supports seeking forward")
+      val posDiff = desiredPos - bufferFilePos
+      if (posDiff >= 0 && posDiff < bufferSize) {
+        bufferPos = posDiff.toInt
+      } else {
+        in.seek(desiredPos)
+        bufferFilePos = desiredPos
+        bufferSize = 0
+        bufferPos = 0
+      }
+    }
+
+    override def close(): Unit = {
+      readTime.ns {
+        in.close()
+      }
+    }
+
+    private def fillBuffer(): Unit = {
+      // TODO: Add FileCache support https://github.com/NVIDIA/spark-rapids/issues/11775
+      var bytesToCopy = currentColumn.map { c =>
+        Math.max(0, c.getStartingPos + c.getTotalSize - getPos)
+      }.getOrElse(0L)
+      var done = bytesToCopy >= buffer.length
+      while (!done && columnIter.hasNext) {
+        val column = columnIter.next()
+        currentColumn = Some(column)
+        done = if (getPos + bytesToCopy == column.getStartingPos) {
+          bytesToCopy += column.getTotalSize
+          bytesToCopy >= buffer.length
+        } else {
+          true
+        }
+      }
+      if (bytesToCopy <= 0) {
+        throw new EOFException("read beyond column data range")
+      }
+      bufferFilePos = in.getPos
+      bufferPos = 0
+      bufferSize = Math.min(bytesToCopy, buffer.length).toInt
+      readTime.ns {
+        in.readFully(buffer, 0, bufferSize)
+      }
+    }
+  }
+
+  /**
+   * Copies the data corresponding to the clipped blocks in the original file and compute the
+   * block metadata for the output. The output blocks will contain the same column chunk
+   * metadata but with the file offsets updated to reflect the new position of the column data
+   * as written to the output.
+   *
+   * @param filePath the path to the Parquet file
+   * @param out the output stream to receive the data
+   * @param blocks block metadata from the original file that will appear in the computed file
+   * @param realStartOffset starting file offset of the first block
+   * @return updated block metadata corresponding to the output
+   */
+  protected def copyAndUncompressBlocksData(
+      filePath: Path,
+      out: HostMemoryOutputStream,
+      blocks: Seq[BlockMetaData],
+      realStartOffset: Long,
+      metrics: Map[String, GpuMetric],
+      compressCfg: CpuCompressionConfig): Seq[BlockMetaData] = {
+    val outStartPos = out.getPos
+    val writeTime = metrics.getOrElse(WRITE_BUFFER_TIME, NoopMetric)
+    withResource(new BufferedFileInput(filePath, blocks, metrics)) { in =>
+      val newBlocks = blocks.map { block =>
+        val newColumns = block.getColumns.asScala.map { column =>
+          var columnTotalSize = column.getTotalSize
+          var columnCodec = column.getCodec
+          val columnStartingPos = realStartOffset + out.getPos - outStartPos
+          val columnDictOffset = if (column.getDictionaryPageOffset > 0) {
+            column.getDictionaryPageOffset + columnStartingPos - column.getStartingPos
+          } else {
+            0
+          }
+          writeTime.ns {
+            columnCodec match {
+              case CompressionCodecName.SNAPPY if compressCfg.decompressSnappyCpu =>
+                val columnStartPos = out.getPos
+                decompressSnappy(in, out, column)
+                columnCodec = CompressionCodecName.UNCOMPRESSED
+                columnTotalSize = out.getPos - columnStartPos
+              case CompressionCodecName.ZSTD if compressCfg.decompressZstdCpu =>
+                val columnStartPos = out.getPos
+                decompressZstd(in, out, column)
+                columnCodec = CompressionCodecName.UNCOMPRESSED
+                columnTotalSize = out.getPos - columnStartPos
+              case _ =>
+                in.seek(column.getStartingPos)
+                in.read(out, columnTotalSize)
+            }
+          }
+          ColumnChunkMetaData.get(
+            column.getPath,
+            column.getPrimitiveType,
+            columnCodec,
+            column.getEncodingStats,
+            column.getEncodings,
+            column.getStatistics,
+            columnStartingPos,
+            columnDictOffset,
+            column.getValueCount,
+            columnTotalSize,
+            columnTotalSize)
+        }
+        GpuParquetUtils.newBlockMeta(block.getRowCount, newColumns.toSeq)
+      }
+      newBlocks
+    }
+  }
+
+  private def decompressSnappy(
+      in: BufferedFileInput,
+      out: HostMemoryOutputStream,
+      column: ColumnChunkMetaData): Unit = {
+    val endPos = column.getStartingPos + column.getTotalSize
+    in.seek(column.getStartingPos)
+    var inData: Option[HostMemoryBuffer] = None
+    try {
+      while (in.getPos != endPos) {
+        val pageHeader = Util.readPageHeader(in)
+        val compressedSize = pageHeader.getCompressed_page_size
+        val uncompressedSize = pageHeader.getUncompressed_page_size
+        pageHeader.unsetCrc()
+        pageHeader.setCompressed_page_size(uncompressedSize)
+        Util.writePageHeader(pageHeader, out)
+        if (inData.map(_.getLength).getOrElse(0L) < compressedSize) {
+          inData.foreach(_.close())
+          inData = Some(HostMemoryBuffer.allocate(compressedSize, false))
+        }
+        inData.foreach { compressedBuffer =>
+          in.read(compressedBuffer, compressedSize)
+          val bbIn = compressedBuffer.asByteBuffer(0, compressedSize)
+          val bbOut = out.writeAsByteBuffer(uncompressedSize)
+          Snappy.uncompress(bbIn, bbOut)
+        }
+      }
+    } finally {
+      inData.foreach(_.close())
+    }
+  }
+
+  private def decompressZstd(
+      in: BufferedFileInput,
+      out: HostMemoryOutputStream,
+      column: ColumnChunkMetaData): Unit = {
+    val endPos = column.getStartingPos + column.getTotalSize
+    in.seek(column.getStartingPos)
+    var inData: Option[HostMemoryBuffer] = None
+    try {
+      withResource(new ZstdDecompressCtx()) { ctx =>
+        while (in.getPos != endPos) {
+          val pageHeader = Util.readPageHeader(in)
+          val compressedSize = pageHeader.getCompressed_page_size
+          val uncompressedSize = pageHeader.getUncompressed_page_size
+          pageHeader.unsetCrc()
+          pageHeader.setCompressed_page_size(uncompressedSize)
+          Util.writePageHeader(pageHeader, out)
+          if (inData.map(_.getLength).getOrElse(0L) < compressedSize) {
+            inData.foreach(_.close())
+            inData = Some(HostMemoryBuffer.allocate(compressedSize, false))
+          }
+          inData.foreach { compressedBuffer =>
+            in.read(compressedBuffer, compressedSize)
+            val bbIn = compressedBuffer.asByteBuffer(0, compressedSize)
+            val bbOut = out.writeAsByteBuffer(uncompressedSize)
+            ctx.decompress(bbOut, bbIn)
+          }
+        }
+      }
+    } finally {
+      inData.foreach(_.close())
+    }
+  }
+
   private def copyRemoteBlocksData(
       remoteCopies: Seq[CopyRange],
       filePath: Path,
@@ -1679,7 +1936,11 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
       closeOnExcept(HostMemoryBuffer.allocate(estTotalSize)) { hmb =>
         val out = new HostMemoryOutputStream(hmb)
         out.write(ParquetPartitionReader.PARQUET_MAGIC)
-        val outputBlocks = copyBlocksData(filePath, out, blocks, out.getPos, metrics)
+        val outputBlocks = if (compressCfg.decompressAnyCpu) {
+          copyAndUncompressBlocksData(filePath, out, blocks, out.getPos, metrics, compressCfg)
+        } else {
+          copyBlocksData(filePath, out, blocks, out.getPos, metrics)
+        }
         val footerPos = out.getPos
         writeFooter(out, outputBlocks, clippedSchema)
         BytesUtils.writeIntLittleEndian(out, (out.getPos - footerPos).toInt)
@@ -1815,7 +2076,7 @@ trait ParquetPartitionReaderBase extends Logging with ScanWithMetrics
     block.asInstanceOf[ParquetDataBlock].dataBlock
 
   implicit def toDataBlockBase(blocks: Seq[BlockMetaData]): Seq[DataBlockBase] =
-    blocks.map(ParquetDataBlock)
+    blocks.map(b => ParquetDataBlock(b, compressCfg))
 
   implicit def toBlockMetaDataSeq(blocks: Seq[DataBlockBase]): Seq[BlockMetaData] =
     blocks.map(_.asInstanceOf[ParquetDataBlock].dataBlock)
@@ -1827,10 +2088,14 @@ private case class ParquetSchemaWrapper(schema: MessageType) extends SchemaBase
 }
 
 // Parquet BlockMetaData wrapper
-private case class ParquetDataBlock(dataBlock: BlockMetaData) extends DataBlockBase {
+private case class ParquetDataBlock(
+    dataBlock: BlockMetaData,
+    compressCfg: CpuCompressionConfig) extends DataBlockBase {
   override def getRowCount: Long = dataBlock.getRowCount
   override def getReadDataSize: Long = dataBlock.getTotalByteSize
-  override def getBlockSize: Long = dataBlock.getColumns.asScala.map(_.getTotalSize).sum
+  override def getBlockSize: Long = {
+    ParquetPartitionReader.computeOutputSize(dataBlock, compressCfg)
+  }
 }
 
 /** Parquet extra information containing rebase modes and whether there is int96 timestamp */
@@ -1889,6 +2154,7 @@ class MultiFileParquetPartitionReader(
     maxGpuColumnSizeBytes: Long,
     useChunkedReader: Boolean,
     maxChunkedReaderMemoryUsageSizeBytes: Long,
+    override val compressCfg: CpuCompressionConfig,
     override val execMetrics: Map[String, GpuMetric],
     partitionSchema: StructType,
     numThreads: Int,
@@ -1913,7 +2179,8 @@ class MultiFileParquetPartitionReader(
       file: Path,
       outhmb: HostMemoryBuffer,
       blocks: ArrayBuffer[DataBlockBase],
-      offset: Long)
+      offset: Long,
+      compressCfg: CpuCompressionConfig)
     extends Callable[(Seq[DataBlockBase], Long)] {
 
     override def call(): (Seq[DataBlockBase], Long) = {
@@ -1922,7 +2189,11 @@ class MultiFileParquetPartitionReader(
         val startBytesRead = fileSystemBytesRead()
         val outputBlocks = withResource(outhmb) { _ =>
           withResource(new HostMemoryOutputStream(outhmb)) { out =>
-            copyBlocksData(file, out, blocks.toSeq, offset, metrics)
+            if (compressCfg.decompressAnyCpu) {
+              copyAndUncompressBlocksData(file, out, blocks.toSeq, offset, metrics, compressCfg)
+            } else {
+              copyBlocksData(file, out, blocks.toSeq, offset, metrics)
+            }
           }
         }
         val bytesRead = fileSystemBytesRead() - startBytesRead
@@ -1974,7 +2245,7 @@ class MultiFileParquetPartitionReader(
       blocks: ArrayBuffer[DataBlockBase],
       offset: Long,
       batchContext: BatchContext): Callable[(Seq[DataBlockBase], Long)] = {
-    new ParquetCopyBlocksRunner(taskContext, file, outhmb, blocks, offset)
+    new ParquetCopyBlocksRunner(taskContext, file, outhmb, blocks, offset, compressCfg)
   }
 
   override final def getFileFormatShortName: String = "Parquet"
@@ -2085,6 +2356,7 @@ class MultiFileCloudParquetPartitionReader(
     maxGpuColumnSizeBytes: Long,
     useChunkedReader: Boolean,
     maxChunkedReaderMemoryUsageSizeBytes: Long,
+    override val compressCfg: CpuCompressionConfig,
     override val execMetrics: Map[String, GpuMetric],
     partitionSchema: StructType,
     numThreads: Int,
@@ -2774,6 +3046,7 @@ class ParquetPartitionReader(
     targetBatchSizeBytes: Long,
     useChunkedReader: Boolean,
     maxChunkedReaderMemoryUsageSizeBytes: Long,
+    override val compressCfg: CpuCompressionConfig,
     override val execMetrics: Map[String, GpuMetric],
     dateRebaseMode: DateTimeRebaseMode,
     timestampRebaseMode: DateTimeRebaseMode,
@@ -2886,26 +3159,34 @@ object ParquetPartitionReader {
       length: Long,
       outputOffset: Long) extends CopyItem
 
-  /**
-   * Build a new BlockMetaData
-   *
-   * @param rowCount the number of rows in this block
-   * @param columns the new column chunks to reference in the new BlockMetaData
-   * @return the new BlockMetaData
-   */
-  private[rapids] def newParquetBlock(
-      rowCount: Long,
-      columns: Seq[ColumnChunkMetaData]): BlockMetaData = {
-    val block = new BlockMetaData
-    block.setRowCount(rowCount)
+  private[rapids] def computeOutputSize(
+      blocks: Seq[BlockMetaData],
+      compressCfg: CpuCompressionConfig): Long = {
+    blocks.map { block =>
+      computeOutputSize(block, compressCfg)
+    }.sum
+  }
 
-    var totalSize: Long = 0
-    columns.foreach { column =>
-      block.addColumn(column)
-      totalSize += column.getTotalUncompressedSize
+  private[rapids] def computeOutputSize(
+      block: BlockMetaData,
+      compressCfg: CpuCompressionConfig): Long = {
+    if (compressCfg.decompressAnyCpu) {
+      block.getColumns.asScala.map { c =>
+        if ((c.getCodec == CompressionCodecName.SNAPPY && compressCfg.decompressSnappyCpu)
+            || (c.getCodec == CompressionCodecName.ZSTD && compressCfg.decompressZstdCpu)) {
+          // Page headers need to be rewritten when CPU decompresses, and that may
+          // increase the size of the page header. Guess how many pages there may be
+          // and add a fudge factor per page to try to avoid a late realloc+copy.
+          // NOTE: Avoid using block.getTotalByteSize as that is the
+          //       uncompressed size rather than the size in the file.
+          val estimatedPageCount = (c.getTotalUncompressedSize / (1024 * 1024)) + 1
+          c.getTotalUncompressedSize + estimatedPageCount * 8
+        } else {
+          c.getTotalSize
+        }
+      }.sum
+    } else {
+      block.getColumns.asScala.map(_.getTotalSize).sum
     }
-    block.setTotalByteSize(totalSize)
-
-    block
   }
 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSemaphore.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSemaphore.scala
index a2287e52b75..719c4525373 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSemaphore.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSemaphore.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue}
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import ai.rapids.cudf.{NvtxColor, NvtxRange}
+import ai.rapids.cudf.{NvtxColor, NvtxRange, NvtxUniqueRange}
 import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion
 
 import org.apache.spark.TaskContext
@@ -162,7 +162,7 @@ object GpuSemaphore {
  * this is considered to be okay as there are other mechanisms in place, and it should be rather
  * rare.
  */
-private final class SemaphoreTaskInfo(val taskAttemptId: Long) extends Logging {
+private final class SemaphoreTaskInfo(val stageId: Int, val taskAttemptId: Long) extends Logging {
   /**
    * This holds threads that are not on the GPU yet. Most of the time they are
    * blocked waiting for the semaphore to let them on, but it may hold one
@@ -179,6 +179,7 @@ private final class SemaphoreTaskInfo(val taskAttemptId: Long) extends Logging {
    */
   private val activeThreads = new util.LinkedHashSet[Thread]()
   private lazy val numPermits = GpuSemaphore.computeNumPermits(SQLConf.get)
+  private lazy val trackSemaphore = RapidsConf.TRACE_TASK_GPU_OWNERSHIP.get(SQLConf.get)
   /**
    * If this task holds the GPU semaphore or not.
    */
@@ -187,6 +188,8 @@ private final class SemaphoreTaskInfo(val taskAttemptId: Long) extends Logging {
 
   type GpuBackingSemaphore = PrioritySemaphore[Long]
 
+  var nvtxRange: Option[NvtxUniqueRange] = None
+
   /**
    * Does this task have the GPU semaphore or not. Be careful because it can change at
    * any point in time. So only use it for logging.
@@ -258,6 +261,11 @@ private final class SemaphoreTaskInfo(val taskAttemptId: Long) extends Logging {
             // We now own the semaphore so we need to wake up all of the other tasks that are
             // waiting.
             hasSemaphore = true
+            if (trackSemaphore) {
+              nvtxRange =
+                Some(new NvtxUniqueRange(s"Stage ${stageId} Task ${taskAttemptId} owning GPU",
+                  NvtxColor.ORANGE))
+            }
             moveToActive(t)
             notifyAll()
             done = true
@@ -309,6 +317,8 @@ private final class SemaphoreTaskInfo(val taskAttemptId: Long) extends Logging {
       semaphore.release(numPermits)
       hasSemaphore = false
       lastHeld = System.currentTimeMillis()
+      nvtxRange.foreach(_.close())
+      nvtxRange = None
     }
     // It should be impossible for the current thread to be blocked when releasing the semaphore
     // because no blocked thread should ever leave `blockUntilReady`, which is where we put it in
@@ -324,7 +334,9 @@ private final class GpuSemaphore() extends Logging {
 
   type GpuBackingSemaphore = PrioritySemaphore[Long]
   private val semaphore = new GpuBackingSemaphore(MAX_PERMITS)
-  // Keep track of all tasks that are both active on the GPU and blocked waiting on the GPU
+  // A map of taskAttemptId => semaphoreTaskInfo.
+  // This map keeps track of all tasks that are both active on the GPU and blocked waiting
+  // on the GPU.
   private val tasks = new ConcurrentHashMap[Long, SemaphoreTaskInfo]
 
   def tryAcquire(context: TaskContext): TryAcquireResult = {
@@ -333,7 +345,7 @@ private final class GpuSemaphore() extends Logging {
     val taskAttemptId = context.taskAttemptId()
     val taskInfo = tasks.computeIfAbsent(taskAttemptId, _ => {
       onTaskCompletion(context, completeTask)
-      new SemaphoreTaskInfo(taskAttemptId)
+      new SemaphoreTaskInfo(context.stageId(), taskAttemptId)
     })
     if (taskInfo.tryAcquire(semaphore, taskAttemptId)) {
       GpuDeviceManager.initializeFromTask()
@@ -357,7 +369,7 @@ private final class GpuSemaphore() extends Logging {
       val taskAttemptId = context.taskAttemptId()
       val taskInfo = tasks.computeIfAbsent(taskAttemptId, _ => {
         onTaskCompletion(context, completeTask)
-        new SemaphoreTaskInfo(taskAttemptId)
+        new SemaphoreTaskInfo(context.stageId(), taskAttemptId)
       })
       taskInfo.blockUntilReady(semaphore)
       GpuDeviceManager.initializeFromTask()
@@ -381,7 +393,7 @@ private final class GpuSemaphore() extends Logging {
   def completeTask(context: TaskContext): Unit = {
     val taskAttemptId = context.taskAttemptId()
     GpuTaskMetrics.get.updateRetry(taskAttemptId)
-    GpuTaskMetrics.get.updateMaxGpuMemory(taskAttemptId)
+    GpuTaskMetrics.get.updateMaxMemory(taskAttemptId)
     val refs = tasks.remove(taskAttemptId)
     if (refs == null) {
       throw new IllegalStateException(s"Completion of unknown task $taskAttemptId")
@@ -426,4 +438,4 @@ private final class GpuSemaphore() extends Logging {
       logDebug(s"shutting down with ${tasks.size} tasks still registered")
     }
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala
index a88bd9f2cfb..c33c19cdd8a 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffleCoalesceExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,10 +18,14 @@ package com.nvidia.spark.rapids
 
 import java.util
 
-import ai.rapids.cudf.{HostMemoryBuffer, JCudfSerialization, NvtxColor, NvtxRange}
-import ai.rapids.cudf.JCudfSerialization.{HostConcatResult, SerializedTableHeader}
+import scala.reflect.ClassTag
+
+import ai.rapids.cudf.{JCudfSerialization, NvtxColor, NvtxRange}
+import ai.rapids.cudf.JCudfSerialization.HostConcatResult
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
+import com.nvidia.spark.rapids.GpuMetric.{CONCAT_BUFFER_TIME, CONCAT_HEADER_TIME}
 import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion
+import com.nvidia.spark.rapids.jni.kudo.{KudoHostMergeResult, KudoSerializer, KudoTable}
 import com.nvidia.spark.rapids.shims.ShimUnaryExecNode
 
 import org.apache.spark.TaskContext
@@ -30,6 +34,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -37,11 +42,12 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * Coalesces serialized tables on the host up to the target batch size before transferring
  * the coalesced result to the GPU. This reduces the overhead of copying data to the GPU
  * and also helps avoid holding onto the GPU semaphore while shuffle I/O is being performed.
+ *
  * @note This should ALWAYS appear in the plan after a GPU shuffle when RAPIDS shuffle is
  *       not being used.
  */
 case class GpuShuffleCoalesceExec(child: SparkPlan, targetBatchByteSize: Long)
-    extends ShimUnaryExecNode with GpuExec {
+  extends ShimUnaryExecNode with GpuExec {
 
   import GpuMetric._
 
@@ -49,7 +55,9 @@ case class GpuShuffleCoalesceExec(child: SparkPlan, targetBatchByteSize: Long)
     OP_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_OP_TIME),
     NUM_INPUT_ROWS -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_ROWS),
     NUM_INPUT_BATCHES -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_BATCHES),
-    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME)
+    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
+    CONCAT_HEADER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_HEADER_TIME),
+    CONCAT_BUFFER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_BUFFER_TIME),
   )
 
   override protected val outputBatchesLevel = MODERATE_LEVEL
@@ -66,78 +74,248 @@ case class GpuShuffleCoalesceExec(child: SparkPlan, targetBatchByteSize: Long)
     val metricsMap = allMetrics
     val targetSize = targetBatchByteSize
     val dataTypes = GpuColumnVector.extractTypes(schema)
+    val readOption = CoalesceReadOption(conf)
 
     child.executeColumnar().mapPartitions { iter =>
-      new GpuShuffleCoalesceIterator(
-        new HostShuffleCoalesceIterator(iter, targetSize, metricsMap),
-        dataTypes, metricsMap)
+      GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(iter, targetSize, dataTypes,
+        readOption, metricsMap)
+    }
+  }
+}
+
+/** A case class to pack some options. Now it has only one, but may have more in the future */
+case class CoalesceReadOption private(kudoEnabled: Boolean)
+
+object CoalesceReadOption {
+  def apply(conf: SQLConf): CoalesceReadOption = {
+    CoalesceReadOption(RapidsConf.SHUFFLE_KUDO_SERIALIZER_ENABLED.get(conf))
+  }
+
+  def apply(conf: RapidsConf): CoalesceReadOption = {
+    CoalesceReadOption(conf.shuffleKudoSerializerEnabled)
+  }
+}
+
+object GpuShuffleCoalesceUtils {
+  /**
+   * Return an iterator that will pull in batches from the input iterator,
+   * concatenate them up to the "targetSize" and move the concatenated result
+   * to the GPU for each output batch.
+   * The input iterator is expected to contain only serialized host batches just
+   * returned from the Shuffle deserializer. Otherwise, it will blow up.
+   *
+   * @param iter               the input iterator containing only serialized host batches
+   * @param targetSize         the target batch size for coalescing
+   * @param dataTypes          the schema of the input batches
+   * @param readOption         the coalesce read option
+   * @param metricsMap         metrics map
+   * @param prefetchFirstBatch whether prefetching the first bundle of serialized
+   *                           batches with the total size up to the "targetSize". The
+   *                           prefetched batches will be cached on host until the "next()"
+   *                           is called. This is for some optimization cases in join.
+   */
+  def getGpuShuffleCoalesceIterator(
+      iter: Iterator[ColumnarBatch],
+      targetSize: Long,
+      dataTypes: Array[DataType],
+      readOption: CoalesceReadOption,
+      metricsMap: Map[String, GpuMetric],
+      prefetchFirstBatch: Boolean = false): Iterator[ColumnarBatch] = {
+    val hostIter = if (readOption.kudoEnabled) {
+      new KudoHostShuffleCoalesceIterator(iter, targetSize, metricsMap, dataTypes)
+    } else {
+      new HostShuffleCoalesceIterator(iter, targetSize, metricsMap)
+    }
+    val maybeBufferedIter = if (prefetchFirstBatch) {
+      val bufferedIter = new CloseableBufferedIterator(hostIter)
+      withResource(new NvtxRange("fetch first batch", NvtxColor.YELLOW)) { _ =>
+        // Force a coalesce of the first batch before we grab the GPU semaphore
+        bufferedIter.headOption
+      }
+      bufferedIter
+    } else {
+      hostIter
+    }
+    new GpuShuffleCoalesceIterator(maybeBufferedIter, dataTypes, metricsMap)
+  }
+
+  /** Get the buffer size of a serialized batch just returned by the Shuffle deserializer */
+  def getSerializedBufferSize(cb: ColumnarBatch): Long = {
+    assert(cb.numCols() == 1)
+    val hmb = cb.column(0) match {
+      case col: KudoSerializedTableColumn => col.kudoTable.getBuffer
+      case serCol: SerializedTableColumn => serCol.hostBuffer
+      case o => throw new IllegalStateException(s"unsupported type: ${o.getClass}")
+    }
+    if (hmb != null) hmb.getLength else 0L
+  }
+}
+
+/**
+ * A trait representing the shuffle coalesced result by the Shuffle coalesce iterator.
+ */
+sealed trait CoalescedHostResult extends AutoCloseable {
+  /** Convert itself to a GPU batch */
+  def toGpuBatch(dataTypes: Array[DataType]): ColumnarBatch
+
+  /** Get the data size */
+  def getDataSize: Long
+}
+
+/**
+ * A trait defining some operations on the table T.
+ * This is used by HostCoalesceIteratorBase to separate the table operations from
+ * the shuffle read process.
+ */
+sealed trait SerializedTableOperator[T <: AutoCloseable] {
+  def getDataLen(table: T): Long
+
+  def getNumRows(table: T): Int
+
+  def concatOnHost(tables: Array[T]): CoalescedHostResult
+}
+
+class JCudfCoalescedHostResult(hostConcatResult: HostConcatResult) extends CoalescedHostResult {
+  assert(hostConcatResult != null, "hostConcatResult should not be null")
+
+  override def toGpuBatch(dataTypes: Array[DataType]): ColumnarBatch =
+    cudf_utils.HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
+
+  override def close(): Unit = hostConcatResult.close()
+
+  override def getDataSize: Long = hostConcatResult.getTableHeader.getDataLen
+}
+
+class JCudfTableOperator extends SerializedTableOperator[SerializedTableColumn] {
+  override def getDataLen(table: SerializedTableColumn): Long = table.header.getDataLen
+
+  override def getNumRows(table: SerializedTableColumn): Int = table.header.getNumRows
+
+  override def concatOnHost(tables: Array[SerializedTableColumn]): CoalescedHostResult = {
+    assert(tables.nonEmpty, "no tables to be concatenated")
+    val numCols = tables.head.header.getNumColumns
+    val ret = if (numCols == 0) {
+      val totalRowsNum = tables.map(getNumRows).sum
+      cudf_utils.HostConcatResultUtil.rowsOnlyHostConcatResult(totalRowsNum)
+    } else {
+      val (headers, buffers) = tables.map(t => (t.header, t.hostBuffer)).unzip
+      JCudfSerialization.concatToHostBuffer(headers, buffers)
+    }
+    new JCudfCoalescedHostResult(ret)
+  }
+}
+
+case class KudoHostMergeResultWrapper(inner: KudoHostMergeResult) extends CoalescedHostResult {
+
+  /** Convert itself to a GPU batch */
+  override def toGpuBatch(dataTypes: Array[DataType]): ColumnarBatch = {
+    RmmRapidsRetryIterator.withRetryNoSplit {
+      withResource(inner.toTable) { cudfTable =>
+        GpuColumnVector.from(cudfTable, dataTypes)
+      }
+    }
+  }
+
+  /** Get the data size */
+  override def getDataSize: Long = inner.getDataLength
+
+  override def close(): Unit = inner.close()
+}
+
+case class RowCountOnlyMergeResult(rowCount: Int) extends CoalescedHostResult {
+  override def toGpuBatch(dataTypes: Array[DataType]): ColumnarBatch = {
+    new ColumnarBatch(Array.empty, rowCount)
+  }
+
+  override def getDataSize: Long = 0
+
+  override def close(): Unit = {}
+}
+
+class KudoTableOperator(
+    kudo: Option[KudoSerializer] ,
+    kudoMergeHeaderTime: GpuMetric,
+    kudoMergeBufferTime: GpuMetric) extends SerializedTableOperator[KudoSerializedTableColumn] {
+  require(kudo != null, "kudo serializer should not be null")
+
+  override def getDataLen(column: KudoSerializedTableColumn): Long = column.kudoTable.getHeader
+    .getTotalDataLen
+
+  override def getNumRows(column: KudoSerializedTableColumn): Int = column.kudoTable.getHeader
+    .getNumRows
+
+  override def concatOnHost(columns: Array[KudoSerializedTableColumn]): CoalescedHostResult = {
+    require(columns.nonEmpty, "no tables to be concatenated")
+    val numCols = columns.head.kudoTable.getHeader.getNumColumns
+    if (numCols == 0) {
+      val totalRowsNum = columns.map(getNumRows).sum
+      RowCountOnlyMergeResult(totalRowsNum)
+    } else {
+      val kudoTables = new util.ArrayList[KudoTable](columns.length)
+      columns.foreach { column =>
+        kudoTables.add(column.kudoTable)
+      }
+
+      val result = kudo.get.mergeOnHost(kudoTables)
+      kudoMergeHeaderTime += result.getRight.getCalcHeaderTime
+      kudoMergeBufferTime += result.getRight.getMergeIntoHostBufferTime
+
+      KudoHostMergeResultWrapper(result.getLeft)
     }
   }
 }
 
 /**
  * Iterator that coalesces columnar batches that are expected to only contain
- * [[SerializedTableColumn]]. The serialized tables within are collected up
+ * serialized tables from shuffle. The serialized tables within are collected up
  * to the target batch size and then concatenated on the host before handing
  * them to the caller on `.next()`
  */
-class HostShuffleCoalesceIterator(
+abstract class HostCoalesceIteratorBase[T <: AutoCloseable : ClassTag](
     iter: Iterator[ColumnarBatch],
     targetBatchByteSize: Long,
     metricsMap: Map[String, GpuMetric])
-      extends Iterator[HostConcatResult] with AutoCloseable {
+  extends Iterator[CoalescedHostResult] with AutoCloseable {
+
   private[this] val concatTimeMetric = metricsMap(GpuMetric.CONCAT_TIME)
   private[this] val inputBatchesMetric = metricsMap(GpuMetric.NUM_INPUT_BATCHES)
   private[this] val inputRowsMetric = metricsMap(GpuMetric.NUM_INPUT_ROWS)
-  private[this] val serializedTables = new util.ArrayDeque[SerializedTableColumn]
+  private[this] val serializedTables = new util.ArrayDeque[T]
   private[this] var numTablesInBatch: Int = 0
   private[this] var numRowsInBatch: Int = 0
   private[this] var batchByteSize: Long = 0L
 
   // Don't install the callback if in a unit test
   Option(TaskContext.get()).foreach { tc =>
-    onTaskCompletion(tc) {
-      close()
-    }
+    onTaskCompletion(tc)(close())
   }
 
+  protected def tableOperator: SerializedTableOperator[T]
+
   override def close(): Unit = {
     serializedTables.forEach(_.close())
     serializedTables.clear()
   }
 
-  def concatenateTablesInHost(): HostConcatResult = {
+  private def concatenateTablesInHost(): CoalescedHostResult = {
     val result = withResource(new MetricRange(concatTimeMetric)) { _ =>
-      val firstHeader = serializedTables.peekFirst().header
-      if (firstHeader.getNumColumns == 0) {
-        (0 until numTablesInBatch).foreach(_ => serializedTables.removeFirst())
-        cudf_utils.HostConcatResultUtil.rowsOnlyHostConcatResult(numRowsInBatch)
-      } else {
-        val headers = new Array[SerializedTableHeader](numTablesInBatch)
-        withResource(new Array[HostMemoryBuffer](numTablesInBatch)) { buffers =>
-          headers.indices.foreach { i =>
-            val serializedTable = serializedTables.removeFirst()
-            headers(i) = serializedTable.header
-            buffers(i) = serializedTable.hostBuffer
-          }
-          JCudfSerialization.concatToHostBuffer(headers, buffers)
-        }
+      withResource(new Array[T](numTablesInBatch)) { tables =>
+        tables.indices.foreach(i => tables(i) = serializedTables.removeFirst())
+        tableOperator.concatOnHost(tables)
       }
     }
 
     // update the stats for the next batch in progress
     numTablesInBatch = serializedTables.size
-
     batchByteSize = 0
     numRowsInBatch = 0
     if (numTablesInBatch > 0) {
       require(numTablesInBatch == 1,
         "should only track at most one buffer that is not in a batch")
-      val header = serializedTables.peekFirst().header
-      batchByteSize = header.getDataLen
-      numRowsInBatch = header.getNumRows
+      val firstTable = serializedTables.peekFirst()
+      batchByteSize = tableOperator.getDataLen(firstTable)
+      numRowsInBatch = tableOperator.getNumRows(firstTable)
     }
-
     result
   }
 
@@ -150,14 +328,14 @@ class HostShuffleCoalesceIterator(
           // don't bother tracking empty tables
           if (batch.numRows > 0) {
             inputRowsMetric += batch.numRows()
-            val tableColumn = batch.column(0).asInstanceOf[SerializedTableColumn]
-            batchCanGrow = canAddToBatch(tableColumn.header)
+            val tableColumn = batch.column(0).asInstanceOf[T]
+            batchCanGrow = canAddToBatch(tableColumn)
             serializedTables.addLast(tableColumn)
             // always add the first table to the batch even if its beyond the target limits
             if (batchCanGrow || numTablesInBatch == 0) {
               numTablesInBatch += 1
-              numRowsInBatch += tableColumn.header.getNumRows
-              batchByteSize += tableColumn.header.getDataLen
+              numRowsInBatch += tableOperator.getNumRows(tableColumn)
+              batchByteSize += tableOperator.getDataLen(tableColumn)
             }
           } else {
             batch.close()
@@ -172,34 +350,56 @@ class HostShuffleCoalesceIterator(
     numTablesInBatch > 0
   }
 
-  override def next(): HostConcatResult = {
+  override def next(): CoalescedHostResult = {
     if (!hasNext()) {
       throw new NoSuchElementException("No more host batches to concatenate")
     }
     concatenateTablesInHost()
   }
 
-  private def canAddToBatch(nextTable: SerializedTableHeader): Boolean = {
-    if (batchByteSize + nextTable.getDataLen > targetBatchByteSize) {
+  private def canAddToBatch(nextTable: T): Boolean = {
+    if (batchByteSize + tableOperator.getDataLen(nextTable) > targetBatchByteSize) {
       return false
     }
-    if (numRowsInBatch.toLong + nextTable.getNumRows > Integer.MAX_VALUE) {
+    if (numRowsInBatch.toLong + tableOperator.getNumRows(nextTable) > Integer.MAX_VALUE) {
       return false
     }
     true
   }
 }
 
+class HostShuffleCoalesceIterator(
+    iter: Iterator[ColumnarBatch],
+    targetBatchSize: Long,
+    metricsMap: Map[String, GpuMetric])
+  extends HostCoalesceIteratorBase[SerializedTableColumn](iter, targetBatchSize, metricsMap) {
+  override protected def tableOperator = new JCudfTableOperator
+}
+
+class KudoHostShuffleCoalesceIterator(
+    iter: Iterator[ColumnarBatch],
+    targetBatchSize: Long,
+    metricsMap: Map[String, GpuMetric],
+    dataTypes: Array[DataType])
+  extends HostCoalesceIteratorBase[KudoSerializedTableColumn](iter, targetBatchSize, metricsMap) {
+  override protected def tableOperator = {
+    val kudoSer = if (dataTypes.nonEmpty) {
+      Some(new KudoSerializer(GpuColumnVector.from(dataTypes)))
+    } else {
+      None
+    }
+    new KudoTableOperator(kudoSer, metricsMap(CONCAT_HEADER_TIME), metricsMap(CONCAT_BUFFER_TIME))
+  }
+}
+
+
 /**
- * Iterator that coalesces columnar batches that are expected to only contain
- * [[SerializedTableColumn]]. The serialized tables within are collected up
- * to the target batch size and then concatenated on the host before the data
- * is transferred to the GPU.
+ * Iterator that expects only "CoalescedHostResult"s as the input, and transfers
+ * them to GPU.
  */
-class GpuShuffleCoalesceIterator(iter: Iterator[HostConcatResult],
-                                 dataTypes: Array[DataType],
-                                 metricsMap: Map[String, GpuMetric])
-      extends Iterator[ColumnarBatch] {
+class GpuShuffleCoalesceIterator(iter: Iterator[CoalescedHostResult],
+    dataTypes: Array[DataType],
+    metricsMap: Map[String, GpuMetric]) extends Iterator[ColumnarBatch] {
   private[this] val opTimeMetric = metricsMap(GpuMetric.OP_TIME)
   private[this] val outputBatchesMetric = metricsMap(GpuMetric.NUM_OUTPUT_BATCHES)
   private[this] val outputRowsMetric = metricsMap(GpuMetric.NUM_OUTPUT_ROWS)
@@ -211,22 +411,21 @@ class GpuShuffleCoalesceIterator(iter: Iterator[HostConcatResult],
       throw new NoSuchElementException("No more columnar batches")
     }
     withResource(new NvtxRange("Concat+Load Batch", NvtxColor.YELLOW)) { _ =>
-      val hostConcatResult = withResource(new MetricRange(opTimeMetric)) { _ =>
+      val hostCoalescedResult = withResource(new MetricRange(opTimeMetric)) { _ =>
         // op time covers concat time performed in `iter.next()`.
         // Note the concat runs on CPU.
         // GPU time = opTime - concatTime
         iter.next()
       }
 
-      withResource(hostConcatResult) { _ =>
+      withResource(hostCoalescedResult) { _ =>
         // We acquire the GPU regardless of whether `hostConcatResult`
         // is an empty batch or not, because the downstream tasks expect
         // the `GpuShuffleCoalesceIterator` to acquire the semaphore and may
         // generate GPU data from batches that are empty.
         GpuSemaphore.acquireIfNecessary(TaskContext.get())
-
         withResource(new MetricRange(opTimeMetric)) { _ =>
-          val batch = cudf_utils.HostConcatResultUtil.getColumnarBatch(hostConcatResult, dataTypes)
+          val batch = hostCoalescedResult.toGpuBatch(dataTypes)
           outputBatchesMetric += 1
           outputRowsMetric += batch.numRows()
           batch
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
index b4841046acc..a95ff45a973 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledHashJoinExec.scala
@@ -19,7 +19,6 @@ package com.nvidia.spark.rapids
 import scala.collection.mutable
 
 import ai.rapids.cudf.{NvtxColor, NvtxRange}
-import ai.rapids.cudf.JCudfSerialization.HostConcatResult
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RmmRapidsRetryIterator.withRetryNoSplit
 import com.nvidia.spark.rapids.shims.{GpuHashPartitioning, ShimBinaryExecNode}
@@ -72,6 +71,7 @@ class GpuShuffledHashJoinMeta(
     val Seq(left, right) = childPlans.map(_.convertIfNeeded())
     val useSizedJoin = GpuShuffledSizedHashJoinExec.useSizedJoin(conf, join.joinType,
       join.leftKeys, join.rightKeys)
+    val readOpt = CoalesceReadOption(conf)
     val joinExec = join.joinType match {
       case LeftOuter | RightOuter if useSizedJoin =>
         GpuShuffledAsymmetricHashJoinExec(
@@ -83,6 +83,7 @@ class GpuShuffledHashJoinMeta(
           right,
           conf.isGPUShuffle,
           conf.gpuTargetBatchSizeBytes,
+          readOpt,
           isSkewJoin = false)(
           join.leftKeys,
           join.rightKeys,
@@ -97,6 +98,7 @@ class GpuShuffledHashJoinMeta(
           right,
           conf.isGPUShuffle,
           conf.gpuTargetBatchSizeBytes,
+          readOpt,
           isSkewJoin = false)(
           join.leftKeys,
           join.rightKeys)
@@ -142,6 +144,8 @@ case class GpuShuffledHashJoinExec(
   override lazy val additionalMetrics: Map[String, GpuMetric] = Map(
     OP_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_OP_TIME),
     CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
+    CONCAT_HEADER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_HEADER_TIME),
+    CONCAT_BUFFER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_BUFFER_TIME),
     BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE),
     BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME),
     STREAM_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_STREAM_TIME),
@@ -282,19 +286,16 @@ object GpuShuffledHashJoinExec extends Logging {
       coalesceMetrics: Map[String, GpuMetric]):
   (Either[ColumnarBatch, Iterator[ColumnarBatch]], Iterator[ColumnarBatch]) = {
     val buildTime = coalesceMetrics(GpuMetric.BUILD_TIME)
-    val buildTypes = buildOutput.map(_.dataType).toArray
+    val buildDataType = buildOutput.map(_.dataType).toArray
     closeOnExcept(new CloseableBufferedIterator(buildIter)) { bufBuildIter =>
       val startTime = System.nanoTime()
+      var isBuildSerialized = false
       // Batches type detection
-      val isBuildSerialized = bufBuildIter.hasNext && isBatchSerialized(bufBuildIter.head)
-
-      // Let batches coalesce for size overflow check
-      val coalesceBuiltIter = if (isBuildSerialized) {
-        new HostShuffleCoalesceIterator(bufBuildIter, targetSize, coalesceMetrics)
-      } else { // Batches on GPU have already coalesced to the target size by the given goal.
-        bufBuildIter
-      }
-
+      val coalesceBuiltIter = getHostShuffleCoalesceIterator(
+        bufBuildIter, buildDataType, targetSize, coalesceMetrics).map { iter =>
+        isBuildSerialized = true
+        iter
+      }.getOrElse(bufBuildIter)
       if (coalesceBuiltIter.hasNext) {
         val firstBuildBatch = coalesceBuiltIter.next()
         // Batches have coalesced to the target size, so size will overflow if there are
@@ -309,7 +310,7 @@ object GpuShuffledHashJoinExec extends Logging {
           // It can be optimized for grabbing the GPU semaphore when there is only a single
           // serialized host batch and the sub-partitioning is not activated.
           val (singleBuildCb, bufferedStreamIter) = getBuildBatchOptimizedAndClose(
-            firstBuildBatch.asInstanceOf[HostConcatResult], streamIter, buildTypes,
+            firstBuildBatch.asInstanceOf[CoalescedHostResult], streamIter, buildDataType,
             buildGoal, buildTime)
           logDebug("In the optimized case for grabbing the GPU semaphore, return " +
             s"a single batch (size: ${getBatchSize(singleBuildCb)}) for the build side " +
@@ -321,8 +322,8 @@ object GpuShuffledHashJoinExec extends Logging {
             coalesceBuiltIter
           val gpuBuildIter = if (isBuildSerialized) {
             // batches on host, move them to GPU
-            new GpuShuffleCoalesceIterator(safeIter.asInstanceOf[Iterator[HostConcatResult]],
-              buildTypes, coalesceMetrics)
+            new GpuShuffleCoalesceIterator(safeIter.asInstanceOf[Iterator[CoalescedHostResult]],
+              buildDataType, coalesceMetrics)
           } else { // batches already on GPU
             safeIter.asInstanceOf[Iterator[ColumnarBatch]]
           }
@@ -348,7 +349,7 @@ object GpuShuffledHashJoinExec extends Logging {
         }
       } else {
         // build is empty
-        (Left(GpuColumnVector.emptyBatchFromTypes(buildTypes)), streamIter)
+        (Left(GpuColumnVector.emptyBatchFromTypes(buildDataType)), streamIter)
       }
     }
   }
@@ -411,16 +412,16 @@ object GpuShuffledHashJoinExec extends Logging {
     }
   }
 
-  /** Only accepts a HostConcatResult or a ColumnarBatch as input */
+  /** Only accepts a CoalescedHostResult or a ColumnarBatch as input */
   private def getBatchSize(maybeBatch: AnyRef): Long = maybeBatch match {
     case batch: ColumnarBatch => GpuColumnVector.getTotalDeviceMemoryUsed(batch)
-    case hostBatch: HostConcatResult => hostBatch.getTableHeader().getDataLen()
-    case _ => throw new IllegalStateException(s"Expect a HostConcatResult or a " +
+    case hostBatch: CoalescedHostResult => hostBatch.getDataSize
+    case _ => throw new IllegalStateException(s"Expect a CoalescedHostResult or a " +
       s"ColumnarBatch, but got a ${maybeBatch.getClass.getSimpleName}")
   }
 
   private def getBuildBatchOptimizedAndClose(
-      hostConcatResult: HostConcatResult,
+      hostConcatResult: CoalescedHostResult,
       streamIter: Iterator[ColumnarBatch],
       buildDataTypes: Array[DataType],
       buildGoal: CoalesceSizeGoal,
@@ -441,8 +442,7 @@ object GpuShuffledHashJoinExec extends Logging {
         }
         // Bring the build batch to the GPU now
         val buildBatch = buildTime.ns {
-          val cb =
-            cudf_utils.HostConcatResultUtil.getColumnarBatch(hostConcatResult, buildDataTypes)
+          val cb = hostConcatResult.toGpuBatch(buildDataTypes)
           getFilterFunc(buildGoal).map(filterAndClose => filterAndClose(cb)).getOrElse(cb)
         }
         (buildBatch, bufStreamIter)
@@ -463,8 +463,23 @@ object GpuShuffledHashJoinExec extends Logging {
     ConcatAndConsumeAll.getSingleBatchWithVerification(singleBatchIter, inputAttrs)
   }
 
-  def isBatchSerialized(batch: ColumnarBatch): Boolean = {
-    batch.numCols() == 1 && batch.column(0).isInstanceOf[SerializedTableColumn]
+  private def getHostShuffleCoalesceIterator(
+      iter: BufferedIterator[ColumnarBatch],
+      dataTypes: Array[DataType],
+      targetSize: Long,
+      coalesceMetrics: Map[String, GpuMetric]): Option[Iterator[CoalescedHostResult]] = {
+    var retIter: Option[Iterator[CoalescedHostResult]] = None
+    if (iter.hasNext && iter.head.numCols() == 1) {
+      iter.head.column(0) match {
+        case _: KudoSerializedTableColumn =>
+          retIter = Some(new KudoHostShuffleCoalesceIterator(iter, targetSize, coalesceMetrics,
+            dataTypes))
+        case _: SerializedTableColumn =>
+          retIter = Some(new HostShuffleCoalesceIterator(iter, targetSize, coalesceMetrics))
+        case _ => // should be gpu batches
+      }
+    }
+    retIter
   }
 }
 
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala
index 4d06bdf0553..177710fea81 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuShuffledSizedHashJoinExec.scala
@@ -26,6 +26,7 @@ import com.nvidia.spark.rapids.GpuShuffledSizedHashJoinExec.JoinInfo
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.RmmRapidsRetryIterator.withRetryNoSplit
 import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion
+import com.nvidia.spark.rapids.jni.kudo.{KudoTable, KudoTableHeader}
 import com.nvidia.spark.rapids.shims.GpuHashPartitioning
 
 import org.apache.spark.rdd.RDD
@@ -223,6 +224,8 @@ object GpuShuffledSizedHashJoinExec {
    * grabbing the GPU semaphore.
    */
   trait HostHostJoinSizer extends JoinSizer[SpillableHostConcatResult] {
+    def readOption: CoalesceReadOption
+
     override def setupForProbe(
         iter: Iterator[ColumnarBatch]): Iterator[SpillableHostConcatResult] = {
       new SpillableHostConcatResultFromColumnarBatchIterator(iter)
@@ -235,24 +238,21 @@ object GpuShuffledSizedHashJoinExec {
         gpuBatchSizeBytes: Long,
         metrics: Map[String, GpuMetric]): Iterator[ColumnarBatch] = {
       val concatMetrics = getConcatMetrics(metrics)
-      val bufferedCoalesceIter = new CloseableBufferedIterator(
-        new HostShuffleCoalesceIterator(
-          new HostQueueBatchIterator(queue, remainingIter),
-          gpuBatchSizeBytes,
-          concatMetrics))
-      withResource(new NvtxRange("fetch first batch", NvtxColor.YELLOW)) { _ =>
-        // Force a coalesce of the first batch before we grab the GPU semaphore
-        bufferedCoalesceIter.headOption
-      }
-      new GpuShuffleCoalesceIterator(bufferedCoalesceIter, batchTypes, concatMetrics)
+      GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(
+        new HostQueueBatchIterator(queue, remainingIter),
+        gpuBatchSizeBytes,
+        batchTypes,
+        readOption,
+        concatMetrics,
+        prefetchFirstBatch = true)
     }
 
     override def getProbeBatchRowCount(batch: SpillableHostConcatResult): Long = {
-      batch.header.getNumRows
+      batch.getNumRows
     }
 
     override def getProbeBatchDataSize(batch: SpillableHostConcatResult): Long = {
-      batch.header.getDataLen
+      batch.getDataLen
     }
   }
 
@@ -265,6 +265,8 @@ object GpuShuffledSizedHashJoinExec {
    * See https://github.com/NVIDIA/spark-rapids/issues/11322.
    */
   trait HostHostUnspillableJoinSizer extends JoinSizer[ColumnarBatch] {
+    def readOption: CoalesceReadOption
+
     override def setupForProbe(
         iter: Iterator[ColumnarBatch]): Iterator[ColumnarBatch] = iter
 
@@ -275,31 +277,25 @@ object GpuShuffledSizedHashJoinExec {
         gpuBatchSizeBytes: Long,
         metrics: Map[String, GpuMetric]): Iterator[ColumnarBatch] = {
       val concatMetrics = getConcatMetrics(metrics)
-      val bufferedCoalesceIter = new CloseableBufferedIterator(
-        new HostShuffleCoalesceIterator(
-          queue.iterator ++ remainingIter,
-          gpuBatchSizeBytes,
-          concatMetrics))
-      withResource(new NvtxRange("fetch first batch", NvtxColor.YELLOW)) { _ =>
-        // Force a coalesce of the first batch before we grab the GPU semaphore
-        bufferedCoalesceIter.headOption
-      }
-      new GpuShuffleCoalesceIterator(bufferedCoalesceIter, batchTypes, concatMetrics)
+      GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(
+        queue.iterator ++ remainingIter,
+        gpuBatchSizeBytes,
+        batchTypes,
+        readOption,
+        concatMetrics,
+        prefetchFirstBatch = true)
     }
 
     override def getProbeBatchRowCount(batch: ColumnarBatch): Long = batch.numRows()
 
     override def getProbeBatchDataSize(batch: ColumnarBatch): Long = {
-      SerializedTableColumn.getMemoryUsed(batch)
+      GpuShuffleCoalesceUtils.getSerializedBufferSize(batch)
     }
   }
 
   /**
    * Join sizer to use when at least one side of the join is coming from another GPU exec node
    * such that the GPU semaphore is already held. Caches input batches on the GPU.
-   *
-   * @param startWithLeftSide whether to prefer fetching from the left or right side first
-   *                          when probing for table sizes.
    */
   trait SpillableColumnarBatchJoinSizer extends JoinSizer[SpillableColumnarBatch] {
     override def setupForProbe(iter: Iterator[ColumnarBatch]): Iterator[SpillableColumnarBatch] = {
@@ -324,7 +320,10 @@ object GpuShuffledSizedHashJoinExec {
     // Use a filtered metrics map to avoid output batch counts and other unrelated metric updates
     Map(
       OP_TIME -> metrics(OP_TIME),
-      CONCAT_TIME -> metrics(CONCAT_TIME)).withDefaultValue(NoopMetric)
+      CONCAT_TIME -> metrics(CONCAT_TIME),
+      CONCAT_HEADER_TIME -> metrics(CONCAT_HEADER_TIME),
+      CONCAT_BUFFER_TIME -> metrics(CONCAT_BUFFER_TIME)
+    ).withDefaultValue(NoopMetric)
   }
 
   def createJoinIterator(
@@ -377,8 +376,10 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
   def isSkewJoin: Boolean
   def cpuLeftKeys: Seq[Expression]
   def cpuRightKeys: Seq[Expression]
+  def readOption: CoalesceReadOption
 
-  protected def createHostHostSizer(): JoinSizer[HOST_BATCH_TYPE]
+  protected def createHostHostSizer(
+      readOption: CoalesceReadOption): JoinSizer[HOST_BATCH_TYPE]
 
   protected def createSpillableColumnarBatchSizer(
       startWithLeftSide: Boolean): JoinSizer[SpillableColumnarBatch]
@@ -388,6 +389,8 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
   override lazy val additionalMetrics: Map[String, GpuMetric] = Map(
     OP_TIME -> createNanoTimingMetric(MODERATE_LEVEL, DESCRIPTION_OP_TIME),
     CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
+    CONCAT_HEADER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_HEADER_TIME),
+    CONCAT_BUFFER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_BUFFER_TIME),
     BUILD_DATA_SIZE -> createSizeMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_DATA_SIZE),
     BUILD_TIME -> createNanoTimingMetric(ESSENTIAL_LEVEL, DESCRIPTION_BUILD_TIME),
     STREAM_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_STREAM_TIME),
@@ -425,20 +428,21 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
     val localCondition = condition
     val localGpuBatchSizeBytes = gpuBatchSizeBytes
     val localMetrics = allMetrics.withDefaultValue(NoopMetric)
+    val localReadOption = readOption
     left.executeColumnar().zipPartitions(right.executeColumnar()) { case (leftIter, rightIter) =>
       val joinInfo = (isLeftHost, isRightHost) match {
         case (true, true) =>
           getHostHostJoinInfo(localJoinType, localLeftKeys, leftOutput, leftIter,
-            localRightKeys, rightOutput, rightIter,
-            localCondition, localGpuBatchSizeBytes, localMetrics)
+            localRightKeys, rightOutput, rightIter, localCondition,
+            localGpuBatchSizeBytes, localReadOption, localMetrics)
         case (true, false) =>
           getHostGpuJoinInfo(localJoinType, localLeftKeys, leftOutput, leftIter,
-            localRightKeys, rightOutput, rightIter,
-            localCondition, localGpuBatchSizeBytes, localMetrics)
+            localRightKeys, rightOutput, rightIter, localCondition,
+            localGpuBatchSizeBytes, localReadOption, localMetrics)
         case (false, true) =>
           getGpuHostJoinInfo(localJoinType, localLeftKeys, leftOutput, leftIter,
-            localRightKeys, rightOutput, rightIter,
-            localCondition, localGpuBatchSizeBytes, localMetrics)
+            localRightKeys, rightOutput, rightIter, localCondition,
+            localGpuBatchSizeBytes, localReadOption, localMetrics)
         case (false, false) =>
           getGpuGpuJoinInfo(localJoinType, localLeftKeys, leftOutput, leftIter,
             localRightKeys, rightOutput, rightIter,
@@ -539,8 +543,9 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
       rightIter: Iterator[ColumnarBatch],
       condition: Option[Expression],
       gpuBatchSizeBytes: Long,
+      readOption: CoalesceReadOption,
       metrics: Map[String, GpuMetric]): JoinInfo = {
-    val sizer = createHostHostSizer()
+    val sizer = createHostHostSizer(readOption)
     sizer.getJoinInfo(joinType, leftKeys, leftOutput, leftIter, rightKeys, rightOutput, rightIter,
       condition, gpuBatchSizeBytes, metrics)
   }
@@ -559,12 +564,15 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
       rightIter: Iterator[ColumnarBatch],
       condition: Option[Expression],
       gpuBatchSizeBytes: Long,
+      readOption: CoalesceReadOption,
       metrics: Map[String, GpuMetric]): JoinInfo = {
     val sizer = createSpillableColumnarBatchSizer(startWithLeftSide = true)
     val concatMetrics = getConcatMetrics(metrics)
-    val leftIter = new GpuShuffleCoalesceIterator(
-      new HostShuffleCoalesceIterator(rawLeftIter, gpuBatchSizeBytes, concatMetrics),
+    val leftIter = GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(
+      rawLeftIter,
+      gpuBatchSizeBytes,
       leftOutput.map(_.dataType).toArray,
+      readOption,
       concatMetrics)
     sizer.getJoinInfo(joinType, leftKeys, leftOutput, leftIter, rightKeys, rightOutput, rightIter,
       condition, gpuBatchSizeBytes, metrics)
@@ -584,12 +592,15 @@ abstract class GpuShuffledSizedHashJoinExec[HOST_BATCH_TYPE <: AutoCloseable] ex
       rawRightIter: Iterator[ColumnarBatch],
       condition: Option[Expression],
       gpuBatchSizeBytes: Long,
+      readOption: CoalesceReadOption,
       metrics: Map[String, GpuMetric]): JoinInfo = {
     val sizer = createSpillableColumnarBatchSizer(startWithLeftSide = false)
     val concatMetrics = getConcatMetrics(metrics)
-    val rightIter = new GpuShuffleCoalesceIterator(
-      new HostShuffleCoalesceIterator(rawRightIter, gpuBatchSizeBytes, concatMetrics),
+    val rightIter = GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(
+      rawRightIter,
+      gpuBatchSizeBytes,
       rightOutput.map(_.dataType).toArray,
+      readOption,
       concatMetrics)
     sizer.getJoinInfo(joinType, leftKeys, leftOutput, leftIter, rightKeys, rightOutput, rightIter,
       condition, gpuBatchSizeBytes, metrics)
@@ -728,8 +739,9 @@ object GpuShuffledSymmetricHashJoinExec {
     }
   }
 
-  class HostHostSymmetricJoinSizer extends SymmetricJoinSizer[SpillableHostConcatResult]
-      with HostHostJoinSizer {
+  class HostHostSymmetricJoinSizer(override val readOption: CoalesceReadOption)
+    extends SymmetricJoinSizer[SpillableHostConcatResult] with HostHostJoinSizer {
+
     override val startWithLeftSide: Boolean = true
   }
 
@@ -762,6 +774,7 @@ case class GpuShuffledSymmetricHashJoinExec(
     override val right: SparkPlan,
     override val isGpuShuffle: Boolean,
     override val gpuBatchSizeBytes: Long,
+    override val readOption: CoalesceReadOption,
     override val isSkewJoin: Boolean)(
     override val cpuLeftKeys: Seq[Expression],
     override val cpuRightKeys: Seq[Expression])
@@ -771,8 +784,9 @@ case class GpuShuffledSymmetricHashJoinExec(
 
   override def otherCopyArgs: Seq[AnyRef] = Seq(cpuLeftKeys, cpuRightKeys)
 
-  override protected def createHostHostSizer(): JoinSizer[SpillableHostConcatResult] = {
-    new HostHostSymmetricJoinSizer()
+  override protected def createHostHostSizer(
+      readOption: CoalesceReadOption): JoinSizer[SpillableHostConcatResult] = {
+    new HostHostSymmetricJoinSizer(readOption)
   }
 
   override protected def createSpillableColumnarBatchSizer(
@@ -1022,7 +1036,9 @@ object GpuShuffledAsymmetricHashJoinExec {
     }
   }
 
-  class HostHostAsymmetricJoinSizer(override val magnificationThreshold: Int)
+  class HostHostAsymmetricJoinSizer(
+      override val magnificationThreshold: Int,
+      override val readOption: CoalesceReadOption)
     extends AsymmetricJoinSizer[ColumnarBatch] with HostHostUnspillableJoinSizer {
   }
 
@@ -1055,6 +1071,7 @@ case class GpuShuffledAsymmetricHashJoinExec(
     override val right: SparkPlan,
     override val isGpuShuffle: Boolean,
     override val gpuBatchSizeBytes: Long,
+    override val readOption: CoalesceReadOption,
     override val isSkewJoin: Boolean)(
     override val cpuLeftKeys: Seq[Expression],
     override val cpuRightKeys: Seq[Expression],
@@ -1064,8 +1081,9 @@ case class GpuShuffledAsymmetricHashJoinExec(
 
   override def otherCopyArgs: Seq[AnyRef] = Seq(cpuLeftKeys, cpuRightKeys, magnificationThreshold)
 
-  override protected def createHostHostSizer(): JoinSizer[ColumnarBatch] = {
-    new HostHostAsymmetricJoinSizer(magnificationThreshold)
+  override protected def createHostHostSizer(
+      readOption: CoalesceReadOption): JoinSizer[ColumnarBatch] = {
+    new HostHostAsymmetricJoinSizer(magnificationThreshold, readOption)
   }
 
   override protected def createSpillableColumnarBatchSizer(
@@ -1077,19 +1095,14 @@ case class GpuShuffledAsymmetricHashJoinExec(
 /**
  * A spillable form of a HostConcatResult. Takes ownership of the specified host buffer.
  */
-class SpillableHostConcatResult(
-    val header: SerializedTableHeader,
-    hmb: HostMemoryBuffer) extends AutoCloseable {
-  private var buffer = {
-    SpillableHostBuffer(hmb, hmb.getLength, SpillPriorities.ACTIVE_BATCHING_PRIORITY)
-  }
+sealed trait SpillableHostConcatResult extends AutoCloseable {
+  def hmb: HostMemoryBuffer
+  def toBatch: ColumnarBatch
+  def getNumRows: Long
+  def getDataLen: Long
 
-  def getHostMemoryBufferAndClose(): HostMemoryBuffer = {
-    val hostBuffer = buffer.getHostBuffer()
-    closeOnExcept(hostBuffer) { _ =>
-      close()
-    }
-    hostBuffer
+  protected var buffer = {
+    SpillableHostBuffer(hmb, hmb.getLength, SpillPriorities.ACTIVE_BATCHING_PRIORITY)
   }
 
   override def close(): Unit = {
@@ -1098,6 +1111,59 @@ class SpillableHostConcatResult(
   }
 }
 
+class CudfSpillableHostConcatResult(
+    header: SerializedTableHeader,
+    val hmb: HostMemoryBuffer) extends SpillableHostConcatResult {
+
+  override def toBatch: ColumnarBatch = {
+    closeOnExcept(buffer.getHostBuffer()) { hostBuf =>
+      SerializedTableColumn.from(header, hostBuf)
+    }
+  }
+
+  override def getNumRows: Long = header.getNumRows
+
+  override def getDataLen: Long = header.getDataLen
+}
+
+class KudoSpillableHostConcatResult(kudoTableHeader: KudoTableHeader,
+    val hmb: HostMemoryBuffer
+) extends SpillableHostConcatResult  {
+  require(kudoTableHeader != null, "KudoTableHeader cannot be null")
+  require(hmb != null, "HostMemoryBuffer cannot be null")
+
+  override def toBatch: ColumnarBatch = closeOnExcept(buffer.getHostBuffer()) { hostBuf =>
+    KudoSerializedTableColumn.from(new KudoTable(kudoTableHeader, hostBuf))
+  }
+
+  override def getNumRows: Long = kudoTableHeader.getNumRows
+
+  override def getDataLen: Long = hmb.getLength
+}
+
+object SpillableHostConcatResult {
+  def from(batch: ColumnarBatch): SpillableHostConcatResult = {
+    require(batch.numCols() == 1, "Batch must have exactly 1 column")
+    batch.column(0) match {
+      case col: KudoSerializedTableColumn => {
+        // This will be closed
+        val oldKudoTable = col.kudoTable
+        val buffer = col.kudoTable.getBuffer
+        if (buffer != null) {
+          buffer.incRefCount()
+        }
+        new KudoSpillableHostConcatResult(oldKudoTable.getHeader, buffer)
+      }
+      case col: SerializedTableColumn =>
+        val buffer = col.hostBuffer
+        buffer.incRefCount()
+        new CudfSpillableHostConcatResult(col.header, buffer)
+      case c =>
+        throw new IllegalStateException(s"Expected SerializedTableColumn, got ${c.getClass}")
+    }
+  }
+}
+
 /**
  * Converts an iterator of shuffle batches in host memory into an iterator of spillable
  * host memory batches.
@@ -1107,17 +1173,7 @@ class SpillableHostConcatResultFromColumnarBatchIterator(
   override def hasNext: Boolean = iter.hasNext
 
   override def next(): SpillableHostConcatResult = {
-    withResource(iter.next()) { batch =>
-      require(batch.numCols() > 0, "Batch must have at least 1 column")
-      batch.column(0) match {
-        case col: SerializedTableColumn =>
-          val buffer = col.hostBuffer
-          buffer.incRefCount()
-          new SpillableHostConcatResult(col.header, buffer)
-        case c =>
-          throw new IllegalStateException(s"Expected SerializedTableColumn, got ${c.getClass}")
-      }
-    }
+    withResource(iter.next())(SpillableHostConcatResult.from)
   }
 }
 
@@ -1137,10 +1193,7 @@ class HostQueueBatchIterator(
 
   override def next(): ColumnarBatch = {
     if (spillableQueue.nonEmpty) {
-      val shcr = spillableQueue.dequeue()
-      closeOnExcept(shcr.getHostMemoryBufferAndClose()) { hostBuffer =>
-        SerializedTableColumn.from(shcr.header, hostBuffer)
-      }
+      withResource(spillableQueue.dequeue())(_.toBatch)
     } else {
       batchIter.next()
     }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinMeta.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinMeta.scala
index b7a9fcb9020..7d7adfc5097 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinMeta.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuSortMergeJoinMeta.scala
@@ -84,6 +84,7 @@ class GpuSortMergeJoinMeta(
     val Seq(left, right) = childPlans.map(_.convertIfNeeded())
     val useSizedJoin = GpuShuffledSizedHashJoinExec.useSizedJoin(conf, join.joinType,
       join.leftKeys, join.rightKeys)
+    val readOpt = CoalesceReadOption(conf)
     val joinExec = join.joinType match {
       case LeftOuter | RightOuter if useSizedJoin =>
         GpuShuffledAsymmetricHashJoinExec(
@@ -95,6 +96,7 @@ class GpuSortMergeJoinMeta(
           right,
           conf.isGPUShuffle,
           conf.gpuTargetBatchSizeBytes,
+          readOpt,
           join.isSkewJoin)(
           join.leftKeys,
           join.rightKeys,
@@ -109,6 +111,7 @@ class GpuSortMergeJoinMeta(
           right,
           conf.isGPUShuffle,
           conf.gpuTargetBatchSizeBytes,
+          readOpt,
           join.isSkewJoin)(
           join.leftKeys,
           join.rightKeys)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuUserDefinedFunction.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuUserDefinedFunction.scala
index 90fe8b29e3d..eae23b86dd5 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuUserDefinedFunction.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuUserDefinedFunction.scala
@@ -123,7 +123,7 @@ trait GpuRowBasedUserDefinedFunction extends GpuExpression
       val retConverter = GpuRowToColumnConverter.getConverterForType(dataType, nullable)
       val retType = GpuColumnVector.convertFrom(dataType, nullable)
       val retRow = new GenericInternalRow(size = 1)
-      closeOnExcept(new RapidsHostColumnBuilder(retType, batch.numRows)) { builder =>
+      withResource(new RapidsHostColumnBuilder(retType, batch.numRows)) { builder =>
         /**
          * This `nullSafe` is for https://github.com/NVIDIA/spark-rapids/issues/3942.
          * And more details can be found from
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostAlloc.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostAlloc.scala
index cefdfa8766e..6a34d15dc6e 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostAlloc.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostAlloc.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,9 @@ package com.nvidia.spark.rapids
 import ai.rapids.cudf.{DefaultHostMemoryAllocator, HostMemoryAllocator, HostMemoryBuffer, MemoryBuffer, PinnedMemoryPool}
 import com.nvidia.spark.rapids.jni.{CpuRetryOOM, RmmSpark}
 
+import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.rapids.GpuTaskMetrics
 
 private class HostAlloc(nonPinnedLimit: Long) extends HostMemoryAllocator with Logging {
   private var currentNonPinnedAllocated: Long = 0L
@@ -52,10 +54,22 @@ private class HostAlloc(nonPinnedLimit: Long) extends HostMemoryAllocator with L
     }
   }
 
+  private def getHostAllocMetricsLogStr(metrics: GpuTaskMetrics): String = {
+    Option(TaskContext.get()).map { context =>
+      val taskId = context.taskAttemptId()
+      val totalSize = metrics.getHostBytesAllocated
+      val maxSize = metrics.getMaxHostBytesAllocated
+      s"total size for task $taskId is $totalSize, max size is $maxSize"
+    }.getOrElse("allocated memory outside of a task context")
+  }
+
   private def releasePinned(ptr: Long, amount: Long): Unit = {
     synchronized {
       currentPinnedAllocated -= amount
     }
+    val metrics = GpuTaskMetrics.get
+    metrics.decHostBytesAllocated(amount)
+    logTrace(getHostAllocMetricsLogStr(metrics))
     RmmSpark.cpuDeallocate(ptr, amount)
   }
 
@@ -63,6 +77,9 @@ private class HostAlloc(nonPinnedLimit: Long) extends HostMemoryAllocator with L
     synchronized {
       currentNonPinnedAllocated -= amount
     }
+    val metrics = GpuTaskMetrics.get
+    metrics.decHostBytesAllocated(amount)
+    logTrace(getHostAllocMetricsLogStr(metrics))
     RmmSpark.cpuDeallocate(ptr, amount)
   }
 
@@ -82,12 +99,12 @@ private class HostAlloc(nonPinnedLimit: Long) extends HostMemoryAllocator with L
       synchronized {
         currentNonPinnedAllocated += amount
       }
-      Some(HostMemoryBuffer.allocate(amount, false))
+      Some(HostMemoryBuffer.allocateRaw(amount))
     } else {
       synchronized {
         if ((currentNonPinnedAllocated + amount) <= nonPinnedLimit) {
           currentNonPinnedAllocated += amount
-          Some(HostMemoryBuffer.allocate(amount, false))
+          Some(HostMemoryBuffer.allocateRaw(amount))
         } else {
           None
         }
@@ -186,6 +203,9 @@ private class HostAlloc(nonPinnedLimit: Long) extends HostMemoryAllocator with L
       allocAttemptFinishedWithoutException = true
     } finally {
       if (ret.isDefined) {
+        val metrics = GpuTaskMetrics.get
+        metrics.incHostBytesAllocated(amount)
+        logTrace(getHostAllocMetricsLogStr(metrics))
         RmmSpark.postCpuAllocSuccess(ret.get.getAddress, amount, blocking, isRecursive)
       } else {
         // shouldRetry should indicate if spill did anything for us and we should try again.
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostMemoryStreams.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostMemoryStreams.scala
index 08fe5be50b2..4be11b13254 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostMemoryStreams.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HostMemoryStreams.scala
@@ -54,6 +54,12 @@ class HostMemoryOutputStream(val buffer: HostMemoryBuffer) extends OutputStream
     pos += numBytes
   }
 
+  def writeAsByteBuffer(length: Int): ByteBuffer = {
+    val bb = buffer.asByteBuffer(pos, length)
+    pos += length
+    bb
+  }
+
   def getPos: Long = pos
 
   def seek(newPos: Long): Unit = {
@@ -132,6 +138,12 @@ trait HostMemoryInputStreamMixIn extends InputStream {
     }
   }
 
+  def readByteBuffer(length: Int): ByteBuffer = {
+    val bb = hmb.asByteBuffer(pos, length)
+    pos += length
+    bb
+  }
+
   override def skip(count: Long): Long = {
     val oldPos = pos
     pos = Math.min(pos + count, hmbLength)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
index 00e1b9165d8..5127c7899a8 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala
@@ -27,9 +27,11 @@ import scala.sys.process._
 import scala.util.Try
 
 import ai.rapids.cudf.{Cuda, CudaException, CudaFatalException, CudfException, MemoryCleaner, NvtxColor, NvtxRange}
+import com.nvidia.spark.DFUDFPlugin
 import com.nvidia.spark.rapids.RapidsConf.AllowMultipleJars
 import com.nvidia.spark.rapids.RapidsPluginUtils.buildInfoEvent
 import com.nvidia.spark.rapids.filecache.{FileCache, FileCacheLocalityManager, FileCacheLocalityMsg}
+import com.nvidia.spark.rapids.io.async.TrafficController
 import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
 import com.nvidia.spark.rapids.python.PythonWorkerSemaphore
 import org.apache.commons.lang3.exception.ExceptionUtils
@@ -65,6 +67,7 @@ object RapidsPluginUtils extends Logging {
 
   private val SQL_PLUGIN_NAME = classOf[SQLExecPlugin].getName
   private val UDF_PLUGIN_NAME = "com.nvidia.spark.udf.Plugin"
+  private val DFUDF_PLUGIN_NAME = classOf[DFUDFPlugin].getName
   private val SQL_PLUGIN_CONF_KEY = StaticSQLConf.SPARK_SESSION_EXTENSIONS.key
   private val SERIALIZER_CONF_KEY = "spark.serializer"
   private val JAVA_SERIALIZER_NAME = classOf[JavaSerializer].getName
@@ -131,11 +134,11 @@ object RapidsPluginUtils extends Logging {
     val possibleRapidsJarURLs = classloader.getResources(propName).asScala.toSet.toSeq.filter {
       url => {
         val urlPath = url.toString
-        // Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-24.10.1-spark341.jar,
+        // Filter out submodule jars, e.g. rapids-4-spark-aggregator_2.12-24.12.0-spark341.jar,
         // and files stored under subdirs of '!/', e.g.
-        // rapids-4-spark_2.12-24.10.1-cuda11.jar!/spark330/rapids4spark-version-info.properties
+        // rapids-4-spark_2.12-24.12.0-cuda11.jar!/spark330/rapids4spark-version-info.properties
         // We only want to find the main jar, e.g.
-        // rapids-4-spark_2.12-24.10.1-cuda11.jar!/rapids4spark-version-info.properties
+        // rapids-4-spark_2.12-24.12.0-cuda11.jar!/rapids4spark-version-info.properties
         !urlPath.contains("rapids-4-spark-") && urlPath.endsWith("!/" + propName)
       }
     }
@@ -226,9 +229,10 @@ object RapidsPluginUtils extends Logging {
   }
 
   def fixupConfigsOnDriver(conf: SparkConf): Unit = {
+    val plugins = Array(SQL_PLUGIN_NAME, UDF_PLUGIN_NAME, DFUDF_PLUGIN_NAME)
     // First add in the SQL executor plugin because that is what we need at a minimum
     if (conf.contains(SQL_PLUGIN_CONF_KEY)) {
-      for (pluginName <- Array(SQL_PLUGIN_NAME, UDF_PLUGIN_NAME)){
+      for (pluginName <- plugins){
         val previousValue = conf.get(SQL_PLUGIN_CONF_KEY).split(",").map(_.trim)
         if (!previousValue.contains(pluginName)) {
           conf.set(SQL_PLUGIN_CONF_KEY, (previousValue :+ pluginName).mkString(","))
@@ -237,7 +241,7 @@ object RapidsPluginUtils extends Logging {
         }
       }
     } else {
-      conf.set(SQL_PLUGIN_CONF_KEY, Array(SQL_PLUGIN_NAME,UDF_PLUGIN_NAME).mkString(","))
+      conf.set(SQL_PLUGIN_CONF_KEY, plugins.mkString(","))
     }
 
     val serializer = conf.get(SERIALIZER_CONF_KEY, JAVA_SERIALIZER_NAME)
@@ -551,6 +555,7 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging {
       extraExecutorPlugins.foreach(_.init(pluginContext, extraConf))
       GpuSemaphore.initialize()
       FileCache.init(pluginContext)
+      TrafficController.initialize(conf)
     } catch {
       // Exceptions in executor plugin can cause a single thread to die but the executor process
       // sticks around without any useful info until it hearbeat times out. Print what happened
@@ -653,6 +658,7 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging {
     extraExecutorPlugins.foreach(_.shutdown())
     FileCache.shutdown()
     GpuCoreDumpHandler.shutdown()
+    TrafficController.shutdown()
   }
 
   override def onTaskFailed(failureReason: TaskFailedReason): Unit = {
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
index a3a73667c94..663a48f5a7e 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala
@@ -551,20 +551,12 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
       .integerConf
       .createWithDefault(2)
 
-  val SHUFFLE_SPILL_THREADS = conf("spark.rapids.sql.shuffle.spillThreads")
-    .doc("Number of threads used to spill shuffle data to disk in the background.")
-    .commonlyUsed()
-    .integerConf
-    .createWithDefault(6)
-
   val GPU_BATCH_SIZE_BYTES = conf("spark.rapids.sql.batchSizeBytes")
     .doc("Set the target number of bytes for a GPU batch. Splits sizes for input data " +
-      "is covered by separate configs. The maximum setting is 2 GB to avoid exceeding the " +
-      "cudf row count limit of a column.")
+      "is covered by separate configs.")
     .commonlyUsed()
     .bytesConf(ByteUnit.BYTE)
-    .checkValue(v => v >= 0 && v <= Integer.MAX_VALUE,
-      s"Batch size must be positive and not exceed ${Integer.MAX_VALUE} bytes.")
+    .checkValue(v => v > 0, "Batch size must be positive")
     .createWithDefault(1 * 1024 * 1024 * 1024) // 1 GiB is the default
 
   val CHUNKED_READER = conf("spark.rapids.sql.reader.chunked")
@@ -763,6 +755,12 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .stringConf
     .createOptional
 
+  val PROFILE_TASK_LIMIT_PER_STAGE = conf("spark.rapids.profile.taskLimitPerStage")
+    .doc("Limit the number of tasks to profile per stage. A value <= 0 will profile all tasks.")
+    .internal()
+    .integerConf
+    .createWithDefault(0)
+
   val PROFILE_ASYNC_ALLOC_CAPTURE = conf("spark.rapids.profile.asyncAllocCapture")
     .doc("Whether the profiler should capture async CUDA allocation and free events")
     .internal()
@@ -828,6 +826,12 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .booleanConf
     .createWithDefault(false)
 
+  val DFUDF_ENABLED = conf("spark.rapids.sql.dfudf.enabled")
+    .doc("When set to false, the DataFrame UDF plugin is disabled. True enables it.")
+    .internal()
+    .booleanConf
+    .createWithDefault(true)
+
   val INCOMPATIBLE_OPS = conf("spark.rapids.sql.incompatibleOps.enabled")
     .doc("For operations that work, but are not 100% compatible with the Spark equivalent " +
       "set if they should be enabled by default or disabled by default.")
@@ -1116,6 +1120,31 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .checkValues(RapidsReaderType.values.map(_.toString))
     .createWithDefault(RapidsReaderType.AUTO.toString)
 
+  val PARQUET_DECOMPRESS_CPU =
+    conf("spark.rapids.sql.format.parquet.decompressCpu")
+      .doc("If true then the CPU is eligible to decompress Parquet data rather than the GPU. " +
+          s"See other spark.rapids.sql.format.parquet.decompressCpu.* configuration settings " +
+          "to control this for specific compression codecs.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
+  val PARQUET_DECOMPRESS_CPU_SNAPPY =
+    conf("spark.rapids.sql.format.parquet.decompressCpu.snappy")
+      .doc(s"If true and $PARQUET_DECOMPRESS_CPU is true then the CPU decompresses " +
+          "Parquet Snappy data rather than the GPU")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+  val PARQUET_DECOMPRESS_CPU_ZSTD =
+    conf("spark.rapids.sql.format.parquet.decompressCpu.zstd")
+      .doc(s"If true and $PARQUET_DECOMPRESS_CPU is true then the CPU decompresses " +
+          "Parquet Zstandard data rather than the GPU")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
   val READER_MULTITHREADED_COMBINE_THRESHOLD =
     conf("spark.rapids.sql.reader.multithreaded.combine.sizeBytes")
       .doc("The target size in bytes to combine multiple small files together when using the " +
@@ -1239,6 +1268,14 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .booleanConf
     .createWithDefault(true)
 
+  val ENABLE_ORC_BOOL = conf("spark.rapids.sql.format.orc.write.boolType.enabled")
+    .doc("When set to false disables boolean columns for ORC writes. " +
+      "Set to true if you want to experiment. " +
+      "See https://github.com/NVIDIA/spark-rapids/issues/11736.")
+    .internal()
+    .booleanConf
+    .createWithDefault(false)
+
   val ENABLE_EXPAND_PREPROJECT = conf("spark.rapids.sql.expandPreproject.enabled")
     .doc("When set to false disables the pre-projection for GPU Expand. " +
       "Pre-projection leverages the tiered projection to evaluate expressions that " +
@@ -1344,12 +1381,12 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .doc("When set to true enables all json input and output acceleration. " +
       "(only input is currently supported anyways)")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val ENABLE_JSON_READ = conf("spark.rapids.sql.format.json.read.enabled")
     .doc("When set to true enables json input acceleration")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val ENABLE_READ_JSON_FLOATS = conf("spark.rapids.sql.json.read.float.enabled")
     .doc("JSON reading is not 100% compatible when reading floats.")
@@ -1367,6 +1404,11 @@ val GPU_COREDUMP_PIPE_PATTERN = conf("spark.rapids.gpu.coreDump.pipePattern")
     .booleanConf
     .createWithDefault(true)
 
+  val ENABLE_READ_JSON_DATE_TIME = conf("spark.rapids.sql.json.read.datetime.enabled")
+    .doc("JSON reading is not 100% compatible when reading dates and timestamps.")
+    .booleanConf
+    .createWithDefault(false)
+
   val ENABLE_AVRO = conf("spark.rapids.sql.format.avro.enabled")
     .doc("When set to true enables all avro input and output acceleration. " +
       "(only input is currently supported anyways)")
@@ -1927,6 +1969,13 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
         .integerConf
         .createWithDefault(20)
 
+  val SHUFFLE_KUDO_SERIALIZER_ENABLED = conf("spark.rapids.shuffle.kudo.serializer.enabled")
+    .doc("Enable or disable the Kudo serializer for the shuffle.")
+    .internal()
+    .startupOnly()
+    .booleanConf
+    .createWithDefault(false)
+
   // ALLUXIO CONFIGS
   val ALLUXIO_MASTER = conf("spark.rapids.alluxio.master")
     .doc("The Alluxio master hostname. If not set, read Alluxio master URL from " +
@@ -2383,6 +2432,43 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
       .booleanConf
       .createWithDefault(true)
 
+  val TRACE_TASK_GPU_OWNERSHIP = conf("spark.rapids.sql.nvtx.traceTaskGpuOwnership")
+    .doc("Enable tracing of the GPU ownership of tasks. This can be useful for debugging " +
+      "deadlocks and other issues related to GPU semaphore.")
+    .internal()
+    .booleanConf
+    .createWithDefault(false)
+
+  val ENABLE_ASYNC_OUTPUT_WRITE =
+    conf("spark.rapids.sql.asyncWrite.queryOutput.enabled")
+      .doc("Option to turn on the async query output write. During the final output write, the " +
+        "task first copies the output to the host memory, and then writes it into the storage. " +
+        "When this option is enabled, the task will asynchronously write the output in the host " +
+        "memory to the storage. Only the Parquet format is supported currently.")
+      .internal()
+      .booleanConf
+      .createWithDefault(false)
+
+  val ASYNC_QUERY_OUTPUT_WRITE_HOLD_GPU_IN_TASK =
+    conf("spark.rapids.sql.queryOutput.holdGpuInTask")
+      .doc("Option to hold GPU semaphore between batch processing during the final output write. " +
+        "This option could degrade query performance if it is enabled without the async query " +
+        "output write. It is recommended to consider enabling this option only when " +
+        s"${ENABLE_ASYNC_OUTPUT_WRITE.key} is set. This option is off by default when the async " +
+        "query output write is disabled; otherwise, it is on.")
+      .internal()
+      .booleanConf
+      .createOptional
+
+  val ASYNC_WRITE_MAX_IN_FLIGHT_HOST_MEMORY_BYTES =
+    conf("spark.rapids.sql.asyncWrite.maxInFlightHostMemoryBytes")
+      .doc("Maximum number of host memory bytes per executor that can be in-flight for async " +
+        "query output write. Tasks may be blocked if the total host memory bytes in-flight " +
+        "exceeds this value.")
+      .internal()
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefault(2L * 1024 * 1024 * 1024)
+
   private def printSectionHeader(category: String): Unit =
     println(s"\n### $category")
 
@@ -2418,7 +2504,7 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression.
         |On startup use: `--conf [conf key]=[conf value]`. For example:
         |
         |```
-        |${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.10.1-cuda11.jar \
+        |${SPARK_HOME}/bin/spark-shell --jars rapids-4-spark_2.12-24.12.0-cuda11.jar \
         |--conf spark.plugins=com.nvidia.spark.SQLPlugin \
         |--conf spark.rapids.sql.concurrentGpuTasks=2
         |```
@@ -2581,6 +2667,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val profileStages: Option[String] = get(PROFILE_STAGES)
 
+  lazy val profileTaskLimitPerStage: Int = get(PROFILE_TASK_LIMIT_PER_STAGE)
+
   lazy val profileDriverPollMillis: Int = get(PROFILE_DRIVER_POLL_MILLIS)
 
   lazy val profileAsyncAllocCapture: Boolean = get(PROFILE_ASYNC_ALLOC_CAPTURE)
@@ -2599,6 +2687,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isUdfCompilerEnabled: Boolean = get(UDF_COMPILER_ENABLED)
 
+  lazy val isDfUdfEnabled: Boolean = get(DFUDF_ENABLED)
+
   lazy val exportColumnarRdd: Boolean = get(EXPORT_COLUMNAR_RDD)
 
   lazy val shuffledHashJoinOptimizeShuffle: Boolean = get(SHUFFLED_HASH_JOIN_OPTIMIZE_SHUFFLE)
@@ -2636,6 +2726,9 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isFoldableNonLitAllowed: Boolean = get(FOLDABLE_NON_LIT_ALLOWED)
 
+  lazy val asyncWriteMaxInFlightHostMemoryBytes: Long =
+    get(ASYNC_WRITE_MAX_IN_FLIGHT_HOST_MEMORY_BYTES)
+
   /**
    * Convert a string value to the injection configuration OomInjection.
    *
@@ -2849,7 +2942,7 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
   lazy val isCombinedExpressionsEnabled: Boolean = get(ENABLE_COMBINED_EXPRESSIONS)
 
   lazy val isRlikeRegexRewriteEnabled: Boolean = get(ENABLE_RLIKE_REGEX_REWRITE)
-  
+
   lazy val isExpandPreprojectEnabled: Boolean = get(ENABLE_EXPAND_PREPROJECT)
 
   lazy val isCoalesceAfterExpandEnabled: Boolean = get(ENABLE_COALESCE_AFTER_EXPAND)
@@ -2900,6 +2993,12 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
   lazy val isParquetMultiThreadReadEnabled: Boolean = isParquetAutoReaderEnabled ||
     RapidsReaderType.withName(get(PARQUET_READER_TYPE)) == RapidsReaderType.MULTITHREADED
 
+  lazy val parquetDecompressCpu: Boolean = get(PARQUET_DECOMPRESS_CPU)
+
+  lazy val parquetDecompressCpuSnappy: Boolean = get(PARQUET_DECOMPRESS_CPU_SNAPPY)
+
+  lazy val parquetDecompressCpuZstd: Boolean = get(PARQUET_DECOMPRESS_CPU_ZSTD)
+
   lazy val maxNumParquetFilesParallel: Int = get(PARQUET_MULTITHREAD_READ_MAX_NUM_FILES_PARALLEL)
 
   lazy val isParquetReadEnabled: Boolean = get(ENABLE_PARQUET_READ)
@@ -2937,6 +3036,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val maxNumOrcFilesParallel: Int = get(ORC_MULTITHREAD_READ_MAX_NUM_FILES_PARALLEL)
 
+  lazy val isOrcBoolTypeEnabled: Boolean = get(ENABLE_ORC_BOOL)
+
   lazy val isCsvEnabled: Boolean = get(ENABLE_CSV)
 
   lazy val isCsvReadEnabled: Boolean = get(ENABLE_CSV_READ)
@@ -2957,6 +3058,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val isJsonDecimalReadEnabled: Boolean = get(ENABLE_READ_JSON_DECIMALS)
 
+  lazy val isJsonDateTimeReadEnabled: Boolean = get(ENABLE_READ_JSON_DATE_TIME)
+
   lazy val isAvroEnabled: Boolean = get(ENABLE_AVRO)
 
   lazy val isAvroReadEnabled: Boolean = get(ENABLE_AVRO_READ)
@@ -3051,6 +3154,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val shuffleMultiThreadedReaderThreads: Int = get(SHUFFLE_MULTITHREADED_READER_THREADS)
 
+  lazy val shuffleKudoSerializerEnabled: Boolean = get(SHUFFLE_KUDO_SERIALIZER_ENABLED)
+
   def isUCXShuffleManagerMode: Boolean =
     RapidsShuffleManagerMode
       .withName(get(SHUFFLE_MANAGER_MODE)) == RapidsShuffleManagerMode.UCX
@@ -3217,6 +3322,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
 
   lazy val caseWhenFuseEnabled: Boolean = get(CASE_WHEN_FUSE)
 
+  lazy val isAsyncOutputWriteEnabled: Boolean = get(ENABLE_ASYNC_OUTPUT_WRITE)
+
   private val optimizerDefaults = Map(
     // this is not accurate because CPU projections do have a cost due to appending values
     // to each row that is produced, but this needs to be a really small number because
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsDiskStore.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsDiskStore.scala
index 5003ba46184..eb3692d434a 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsDiskStore.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsDiskStore.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,8 +28,9 @@ import com.nvidia.spark.rapids.StorageTier.StorageTier
 import com.nvidia.spark.rapids.format.TableMeta
 import org.apache.commons.io.IOUtils
 
+import org.apache.spark.TaskContext
 import org.apache.spark.sql.rapids.{GpuTaskMetrics, RapidsDiskBlockManager}
-import org.apache.spark.sql.rapids.execution.SerializedHostTableUtils
+import org.apache.spark.sql.rapids.execution.{SerializedHostTableUtils, TrampolineUtil}
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -38,6 +39,13 @@ class RapidsDiskStore(diskBlockManager: RapidsDiskBlockManager)
     extends RapidsBufferStoreWithoutSpill(StorageTier.DISK) {
   private[this] val sharedBufferFiles = new ConcurrentHashMap[RapidsBufferId, File]
 
+  private def reportDiskAllocMetrics(metrics: GpuTaskMetrics): String = {
+    val taskId = TaskContext.get().taskAttemptId()
+    val totalSize = metrics.getDiskBytesAllocated
+    val maxSize = metrics.getMaxDiskBytesAllocated
+    s"total size for task $taskId is $totalSize, max size is $maxSize"
+  }
+
   override protected def createBuffer(
       incoming: RapidsBuffer,
       catalog: RapidsBufferCatalog,
@@ -58,7 +66,6 @@ class RapidsDiskStore(diskBlockManager: RapidsDiskBlockManager)
     } else {
       writeToFile(incoming, path, append = false, stream)
     }
-
     logDebug(s"Spilled to $path $fileOffset:$diskLength")
     val buff = incoming match {
       case _: RapidsHostBatchBuffer =>
@@ -79,6 +86,12 @@ class RapidsDiskStore(diskBlockManager: RapidsDiskBlockManager)
           incoming.meta,
           incoming.getSpillPriority)
     }
+    TrampolineUtil.incTaskMetricsDiskBytesSpilled(uncompressedSize)
+
+    val metrics = GpuTaskMetrics.get
+    metrics.incDiskBytesAllocated(uncompressedSize)
+    logDebug(s"acquiring resources for disk buffer $id of size $uncompressedSize bytes")
+    logDebug(reportDiskAllocMetrics(metrics))
     Some(buff)
   }
 
@@ -181,6 +194,11 @@ class RapidsDiskStore(diskBlockManager: RapidsDiskBlockManager)
     }
 
     override protected def releaseResources(): Unit = {
+      logDebug(s"releasing resources for disk buffer $id of size $memoryUsedBytes bytes")
+      val metrics = GpuTaskMetrics.get
+      metrics.decDiskBytesAllocated(memoryUsedBytes)
+      logDebug(reportDiskAllocMetrics(metrics))
+
       // Buffers that share paths must be cleaned up elsewhere
       if (id.canShareDiskPaths) {
         sharedBufferFiles.remove(id)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsHostMemoryStore.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsHostMemoryStore.scala
index 32fe6229674..235ed9ddb45 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsHostMemoryStore.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsHostMemoryStore.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,8 +28,8 @@ import com.nvidia.spark.rapids.SpillPriorities.{applyPriorityOffset, HOST_MEMORY
 import com.nvidia.spark.rapids.StorageTier.StorageTier
 import com.nvidia.spark.rapids.format.TableMeta
 
+import org.apache.spark.TaskContext
 import org.apache.spark.sql.rapids.GpuTaskMetrics
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
 import org.apache.spark.sql.rapids.storage.RapidsStorageUtils
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -99,8 +99,8 @@ class RapidsHostMemoryStore(
       } else {
         val amountSpilled = synchronousSpill(targetTotalSize, catalog, stream)
         if (amountSpilled != 0) {
-          logDebug(s"Spilled $amountSpilled bytes from ${name} to make room for ${buffer.id}")
-          TrampolineUtil.incTaskMetricsDiskBytesSpilled(amountSpilled)
+          logDebug(s"Task ${TaskContext.get.taskAttemptId()} spilled $amountSpilled bytes from" +
+            s"${name} to make room for ${buffer.id}")
         }
         // if after spill we can fit the new buffer, return true
         buffer.memoryUsedBytes <= (ms - currentSize)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
index 362a9cce293..89fd5bf9191 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
@@ -846,24 +846,15 @@ class CudfRegexTranspiler(mode: RegexMode) {
   // from Java 8 documention: a line terminator is a 1 to 2 character sequence that marks
   // the end of a line of an input character sequence.
   // this method produces a RegexAST which outputs a regular expression to match any possible
-  // combination of line terminators
-  private def lineTerminatorMatcher(exclude: Set[Char], excludeCRLF: Boolean,
-      capture: Boolean): RegexAST = {
-    val terminatorChars = new ListBuffer[RegexCharacterClassComponent]()
-    terminatorChars ++= lineTerminatorChars.filter(!exclude.contains(_)).map(RegexChar)
-
-    if (terminatorChars.size == 0 && excludeCRLF) {
+  // combination of line terminators.
+  // Cudf added support to identify \n, \r, \u0085, \u2028, \u2029 as line break characters
+  // when EXT_NEWLINE flag is set. See issue: https://github.com/NVIDIA/spark-rapids/issues/11554
+  private def lineTerminatorMatcher(excludeCRLF: Boolean, capture: Boolean): RegexAST = {
+    if (excludeCRLF) {
       RegexEmpty()
-    } else if (terminatorChars.size == 0) {
+    } else {
       RegexGroup(capture = capture, RegexSequence(ListBuffer(RegexChar('\r'), RegexChar('\n'))),
           None)
-    } else if (excludeCRLF) {
-      RegexGroup(capture = capture,
-        RegexCharacterClass(negated = false, characters = terminatorChars),
-        None
-      )
-    } else {
-      RegexGroup(capture = capture, RegexParser.parse("\r|\u0085|\u2028|\u2029|\r\n"), None)
     }
   }
 
@@ -1093,20 +1084,6 @@ class CudfRegexTranspiler(mode: RegexMode) {
               // line terminator sequences, so we just output the anchor and we are finished
               // for example: \r$ -> \r$ (no transpilation)
               RegexChar('$')
-            case Some(RegexChar(ch)) if lineTerminatorChars.contains(ch) =>
-              // when using any other line terminator character, you can match any of the other
-              // line terminator characters individually as part of the line anchor match.
-              // for example: \n$ -> \n[\r\u0085\u2028\u2029]?$
-              if (mode == RegexReplaceMode) {
-                replacement match {
-                  case Some(rr) => rr.appendBackref(rr.numCaptureGroups + 1)
-                  case _ =>
-                }
-              }
-              RegexSequence(ListBuffer(
-                RegexRepetition(lineTerminatorMatcher(Set(ch), true,
-                    mode == RegexReplaceMode), SimpleQuantifier('?')),
-                RegexChar('$')))
             case Some(RegexEscaped('b')) | Some(RegexEscaped('B')) =>
               throw new RegexUnsupportedException(
                       "Regex sequences with \\b or \\B not supported around $", regex.position)
@@ -1119,8 +1096,8 @@ class CudfRegexTranspiler(mode: RegexMode) {
                 }
               }
               RegexSequence(ListBuffer(
-                RegexRepetition(lineTerminatorMatcher(Set.empty, false,
-                    mode == RegexReplaceMode), SimpleQuantifier('?')),
+                RegexRepetition(lineTerminatorMatcher(excludeCRLF = false,
+                    capture = mode == RegexReplaceMode), SimpleQuantifier('?')),
                 RegexChar('$')))
           }
         case '^' if mode == RegexSplitMode =>
@@ -1359,48 +1336,33 @@ class CudfRegexTranspiler(mode: RegexMode) {
               // when the previous character is a line anchor ($), the JVM has special handling
               // when matching against line terminator characters
               case Some(RegexChar('$')) | Some(RegexEscaped('Z')) =>
-                val j = r.lastIndexWhere {
-                  case RegexEmpty() => false
-                  case _ => true
-                }
                 part match {
                   case RegexGroup(capture, RegexSequence(
                       ListBuffer(RegexCharacterClass(true, parts))), _)
                       if parts.forall(!isBeginOrEndLineAnchor(_)) =>
-                    r(j) = RegexSequence(ListBuffer(lineTerminatorMatcher(Set.empty, true, capture),
-                        RegexChar('$')))
                     popBackrefIfNecessary(capture)
                   case RegexGroup(capture, RegexCharacterClass(true, parts), _)
                       if parts.forall(!isBeginOrEndLineAnchor(_)) =>
-                    r(j) = RegexSequence(ListBuffer(lineTerminatorMatcher(Set.empty, true, capture),
-                        RegexChar('$')))
                     popBackrefIfNecessary(capture)
                   case RegexCharacterClass(true, parts)
                       if parts.forall(!isBeginOrEndLineAnchor(_)) =>
-                    r(j) = RegexSequence(
-                      ListBuffer(lineTerminatorMatcher(Set.empty, true, false), RegexChar('$')))
                     popBackrefIfNecessary(false)
-                  case RegexChar(ch) if ch == '\n' =>
+                  case RegexChar(ch) if lineTerminatorChars.contains(ch) =>
                     // what's really needed here is negative lookahead, but that is not
                     // supported by cuDF
                     // in this case: $\n would transpile to (?!\r)\n$
-                    throw new RegexUnsupportedException("Regex sequence $\\n is not supported",
+                    throw new RegexUnsupportedException(s"Regex sequence $$\\$ch is not supported",
                       part.position)
-                  case RegexChar(ch) if "\r\u0085\u2028\u2029".contains(ch) =>
-                    r(j) = RegexSequence(
-                      ListBuffer(
-                        rewrite(part, replacement, None, flags),
-                        RegexSequence(ListBuffer(
-                          RegexRepetition(lineTerminatorMatcher(Set(ch), true, false),
-                            SimpleQuantifier('?')), RegexChar('$')))))
-                    popBackrefIfNecessary(false)
                   case RegexEscaped('z') =>
-                    // \Z\z or $\z transpiles to $
-                    r(j) = RegexChar('$')
-                    popBackrefIfNecessary(false)
-                  case RegexEscaped(a) if "bB".contains(a) =>
+                    // since \z is not supported by cudf
+                    // we need to transpile $\z to $(?![\r\n\u0085\u2028\u2029])
+                    // however, cudf doesn't support negative look ahead
+                    throw new RegexUnsupportedException("Regex sequence $\\z is not supported",
+                      part.position)
+                  case RegexEscaped(a) if "bBsSdDwWaAf".contains(a) =>
                     throw new RegexUnsupportedException(
-                      "Regex sequences with \\b or \\B not supported around $", part.position)
+                      s"Regex sequences with \\$a are not supported around end-of-line markers " +
+                        "like $ or \\Z at position", part.position)
                   case _ =>
                     r.append(rewrite(part, replacement, last, flags))
                 }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RmmRapidsRetryIterator.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RmmRapidsRetryIterator.scala
index d86aa596325..04bc56af0c4 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RmmRapidsRetryIterator.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RmmRapidsRetryIterator.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -311,6 +311,14 @@ object RmmRapidsRetryIterator extends Logging {
     override def iterator: Iterator[T] = ts.iterator
 
     override def apply(idx: Int): T = ts.apply(idx)
+
+    override def toString(): String = {
+      val totalSize = ts.map {
+        case scb: SpillableColumnarBatch => scb.sizeInBytes
+        case _ => 0L
+      }.sum
+      s"AutoCloseableSeqInternal totalSize:$totalSize, inner:[${ts.mkString(";")}]"
+    }
   }
 
   /**
@@ -454,14 +462,42 @@ object RmmRapidsRetryIterator extends Logging {
       // there is likely not much we can do, and for now we don't handle
       // this OOM
       if (splitPolicy == null) {
+        val message = s"could not split inputs and retry. The current attempt: " +
+          s"{${attemptStack.head}}"
         if (isFromGpuOom) {
-          throw new GpuSplitAndRetryOOM("GPU OutOfMemory: could not split inputs and retry")
+          throw new GpuSplitAndRetryOOM(s"GPU OutOfMemory: $message")
         } else {
-          throw new CpuSplitAndRetryOOM("CPU OutOfMemory: could not split inputs and retry")
+          throw new CpuSplitAndRetryOOM(s"CPU OutOfMemory: $message")
         }
       }
-      // splitPolicy must take ownership of the argument
-      val splitted = splitPolicy(attemptStack.pop())
+      val curAttempt = attemptStack.pop()
+      // Get the info before running the split, since the attempt may be closed after splitting.
+      val attemptAsString = closeOnExcept(curAttempt)(_.toString)
+      val splitted = try {
+        // splitPolicy must take ownership of the argument
+        splitPolicy(curAttempt)
+      } catch {
+          // We only care about OOM exceptions and wrap it by a new exception with the
+          // same type to provide more context for the OOM.
+          // This looks a little odd, because we can not change the type of root exception.
+          // Otherwise, some unit tests will fail due to the wrong exception type returned.
+        case go: GpuRetryOOM =>
+          throw new GpuRetryOOM(
+            s"GPU OutOfMemory: Could not split the current attempt: {$attemptAsString}"
+          ).initCause(go)
+        case go: GpuSplitAndRetryOOM =>
+          throw new GpuSplitAndRetryOOM(
+            s"GPU OutOfMemory: Could not split the current attempt: {$attemptAsString}"
+          ).initCause(go)
+        case co: CpuRetryOOM =>
+          throw new CpuRetryOOM(
+            s"CPU OutOfMemory: Could not split the current attempt: {$attemptAsString}"
+          ).initCause(co)
+        case co: CpuSplitAndRetryOOM =>
+          throw new CpuSplitAndRetryOOM(
+            s"CPU OutOfMemory: Could not split the current attempt: {$attemptAsString}"
+          ).initCause(co)
+      }
       // the splitted sequence needs to be inserted in reverse order
       // so we try the first item first.
       splitted.reverse.foreach(attemptStack.push)
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala
index d5216cbda9f..e1f45c34180 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/SpillableColumnarBatch.scala
@@ -55,6 +55,9 @@ trait SpillableColumnarBatch extends AutoCloseable {
   def sizeInBytes: Long
 
   def dataTypes: Array[DataType]
+
+  override def toString: String =
+    s"SCB size:$sizeInBytes, types:${dataTypes.toList}, rows:${numRows()}"
 }
 
 /**
@@ -79,6 +82,8 @@ class JustRowsColumnarBatch(numRows: Int)
 
   // There is no off heap data and close is a noop so just return this
   override def incRefCount(): SpillableColumnarBatch = this
+
+  override def toString: String = s"JustRowsSCB size:$sizeInBytes, rows:$numRows"
 }
 
 /**
@@ -148,7 +153,8 @@ class SpillableColumnarBatchImpl (
   }
 
   override def toString: String =
-    s"SCB $handle $rowCount ${sparkTypes.toList} $refCount"
+    s"GpuSCB size:$sizeInBytes, handle:$handle, rows:$rowCount, types:${sparkTypes.toList}," +
+      s" refCount:$refCount"
 }
 
 class JustRowsHostColumnarBatch(numRows: Int)
@@ -167,6 +173,8 @@ class JustRowsHostColumnarBatch(numRows: Int)
 
   // There is no off heap data and close is a noop so just return this
   override def incRefCount(): SpillableColumnarBatch = this
+
+  override def toString: String = s"JustRowsHostSCB size:$sizeInBytes, rows:$numRows"
 }
 
 /**
@@ -233,6 +241,10 @@ class SpillableHostColumnarBatchImpl (
       throw new IllegalStateException("Double free on SpillableHostColumnarBatchImpl")
     }
   }
+
+  override def toString: String =
+    s"HostSCB size:$sizeInBytes, handle:$handle, rows:$rowCount, types:${sparkTypes.toList}," +
+      s" refCount:$refCount"
 }
 
 object SpillableColumnarBatch {
@@ -388,6 +400,13 @@ class SpillableBuffer(
   override def close(): Unit = {
     handle.close()
   }
+
+  override def toString: String = {
+    val size = withResource(RapidsBufferCatalog.acquireBuffer(handle)) { rapidsBuffer =>
+      rapidsBuffer.memoryUsedBytes
+    }
+    s"SpillableBuffer size:$size, handle:$handle"
+  }
 }
 
 /**
@@ -422,6 +441,9 @@ class SpillableHostBuffer(handle: RapidsBufferHandle,
       rapidsBuffer.getHostMemoryBuffer
     }
   }
+
+  override def toString: String =
+    s"SpillableHostBuffer length:$length, handle:$handle"
 }
 
 object SpillableBuffer {
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStream.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStream.scala
new file mode 100644
index 00000000000..40904a96dd2
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStream.scala
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.io.{IOException, OutputStream}
+import java.util.concurrent.{Callable, TimeUnit}
+import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
+
+import com.nvidia.spark.rapids.RapidsPluginImplicits._
+
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
+
+/**
+ * OutputStream that performs writes asynchronously. Writes are scheduled on a background thread
+ * and executed in the order they were scheduled. This class is not thread-safe and should only be
+ * used by a single thread.
+ */
+class AsyncOutputStream(openFn: Callable[OutputStream], trafficController: TrafficController)
+  extends OutputStream {
+
+  private var closed = false
+
+  private val executor = new ThrottlingExecutor(
+    TrampolineUtil.newDaemonCachedThreadPool("AsyncOutputStream", 1, 1),
+    trafficController)
+
+  // Open the underlying stream asynchronously as soon as the AsyncOutputStream is constructed,
+  // so that the open can be done in parallel with other operations. This could help with
+  // performance if the open is slow.
+  private val openFuture = executor.submit(openFn, 0)
+  // Let's give it enough time to open the stream. Something bad should have happened if it
+  // takes more than 5 minutes to open a stream.
+  private val openTimeoutMin = 5
+
+  private lazy val delegate: OutputStream = {
+    openFuture.get(openTimeoutMin, TimeUnit.MINUTES)
+  }
+
+  class Metrics {
+    var numBytesScheduled: Long = 0
+    // This is thread-safe as it is updated by the background thread and can be read by
+    // any threads.
+    val numBytesWritten: AtomicLong = new AtomicLong(0)
+  }
+
+  val metrics = new Metrics
+
+  /**
+   * The last error that occurred in the background thread, or None if no error occurred.
+   * Once it is set, all subsequent writes that are already scheduled will fail and no new
+   * writes will be accepted.
+   *
+   * This is thread-safe as it is set by the background thread and can be read by any threads.
+   */
+  val lastError: AtomicReference[Option[Throwable]] =
+    new AtomicReference[Option[Throwable]](None)
+
+  @throws[IOException]
+  private def throwIfError(): Unit = {
+    lastError.get() match {
+      case Some(t: IOException) => throw t
+      case Some(t) => throw new IOException(t)
+      case None =>
+    }
+  }
+
+  @throws[IOException]
+  private def ensureOpen(): Unit = {
+    if (closed) {
+      throw new IOException("Stream closed")
+    }
+  }
+
+  private def scheduleWrite(fn: () => Unit, bytesToWrite: Int): Unit = {
+    throwIfError()
+    ensureOpen()
+
+    metrics.numBytesScheduled += bytesToWrite
+    executor.submit(() => {
+      throwIfError()
+      ensureOpen()
+
+      try {
+        fn()
+        metrics.numBytesWritten.addAndGet(bytesToWrite)
+      } catch {
+        case t: Throwable =>
+          // Update the error state
+          lastError.set(Some(t))
+      }
+    }, bytesToWrite)
+  }
+
+  override def write(b: Int): Unit = {
+    scheduleWrite(() => delegate.write(b), 1)
+  }
+
+  override def write(b: Array[Byte]): Unit = {
+    scheduleWrite(() => delegate.write(b), b.length)
+  }
+
+  /**
+   * Schedules a write of the given bytes to the underlying stream. The write is executed
+   * asynchronously on a background thread. The method returns immediately, and the write may not
+   * have completed when the method returns.
+   *
+   * If an error has occurred in the background thread and [[lastError]] has been set, this function
+   * will throw an IOException immediately.
+   *
+   * If an error has occurred in the background thread while executing a previous write after the
+   * current write has been scheduled, the current write will fail with the same error.
+   */
+  @throws[IOException]
+  override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+    scheduleWrite(() => delegate.write(b, off, len), len)
+  }
+
+  /**
+   * Flushes all pending writes to the underlying stream. This method blocks until all pending
+   * writes have been completed. If an error has occurred in the background thread, this method
+   * will throw an IOException.
+   *
+   * If an error has occurred in the background thread and [[lastError]] has been set, this function
+   * will throw an IOException immediately.
+   *
+   * If an error has occurred in the background thread while executing a previous task after the
+   * current flush has been scheduled, the current flush will fail with the same error.
+   */
+  @throws[IOException]
+  override def flush(): Unit = {
+    throwIfError()
+    ensureOpen()
+
+    val f = executor.submit(() => {
+      throwIfError()
+      ensureOpen()
+
+      delegate.flush()
+    }, 0)
+
+    f.get()
+  }
+
+  /**
+   * Closes the underlying stream and releases any resources associated with it. All pending writes
+   * are flushed before closing the stream. This method blocks until all pending writes have been
+   * completed.
+   *
+   * If an error has occurred while flushing, this function will throw an IOException.
+   *
+   * If an error has occurred while executing a previous task before this function is called,
+   * this function will throw the same error. All resources and the underlying stream are still
+   * guaranteed to be closed.
+   */
+  @throws[IOException]
+  override def close(): Unit = {
+    if (!closed) {
+      Seq[AutoCloseable](
+        () => {
+          // Wait for all pending writes to complete
+          // This will throw an exception if one of the writes fails
+          flush()
+        },
+        () => {
+          // Give the executor a chance to shutdown gracefully.
+          executor.shutdownNow(10, TimeUnit.SECONDS)
+        },
+        delegate,
+        () => closed = true).safeClose()
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutor.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutor.scala
new file mode 100644
index 00000000000..45889bf89ac
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutor.scala
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.util.concurrent.{Callable, ExecutorService, Future, TimeUnit}
+
+/**
+ * Thin wrapper around an ExecutorService that adds throttling.
+ */
+class ThrottlingExecutor(
+    val executor: ExecutorService, throttler: TrafficController) {
+
+  def submit[T](callable: Callable[T], hostMemoryBytes: Long): Future[T] = {
+    val task = new Task[T](hostMemoryBytes, callable)
+    throttler.blockUntilRunnable(task)
+    executor.submit(() => {
+      try {
+        task.call()
+      } finally {
+        throttler.taskCompleted(task)
+      }
+    })
+  }
+
+  def shutdownNow(timeout: Long, timeUnit: TimeUnit): Unit = {
+    executor.shutdownNow()
+    executor.awaitTermination(timeout, timeUnit)
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/TrafficController.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/TrafficController.scala
new file mode 100644
index 00000000000..0110f2d89ca
--- /dev/null
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/io/async/TrafficController.scala
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.util.concurrent.Callable
+import javax.annotation.concurrent.GuardedBy
+
+import com.nvidia.spark.rapids.RapidsConf
+
+/**
+ * Simple wrapper around a [[Callable]] that also keeps track of the host memory bytes used by
+ * the task.
+ *
+ * Note: we may want to add more metadata to the task in the future, such as the device memory,
+ * as we implement more throttling strategies.
+ */
+class Task[T](val hostMemoryBytes: Long, callable: Callable[T]) extends Callable[T] {
+  override def call(): T = callable.call()
+}
+
+/**
+ * Throttle interface to be implemented by different throttling strategies.
+ *
+ * Currently, only HostMemoryThrottle is implemented, which limits the maximum in-flight host
+ * memory bytes. In the future, we can add more throttling strategies, such as limiting the
+ * device memory usage, the number of tasks, etc.
+ */
+trait Throttle {
+
+  /**
+   * Returns true if the task can be accepted, false otherwise.
+   * TrafficController will block the task from being scheduled until this method returns true.
+   */
+  def canAccept[T](task: Task[T]): Boolean
+
+  /**
+   * Callback to be called when a task is scheduled.
+   */
+  def taskScheduled[T](task: Task[T]): Unit
+
+  /**
+   * Callback to be called when a task is completed, either successfully or with an exception.
+   */
+  def taskCompleted[T](task: Task[T]): Unit
+}
+
+/**
+ * Throttle implementation that limits the total host memory used by the in-flight tasks.
+ */
+class HostMemoryThrottle(val maxInFlightHostMemoryBytes: Long) extends Throttle {
+  private var totalHostMemoryBytes: Long = 0
+
+  override def canAccept[T](task: Task[T]): Boolean = {
+    totalHostMemoryBytes + task.hostMemoryBytes <= maxInFlightHostMemoryBytes
+  }
+
+  override def taskScheduled[T](task: Task[T]): Unit = {
+    totalHostMemoryBytes += task.hostMemoryBytes
+  }
+
+  override def taskCompleted[T](task: Task[T]): Unit = {
+    totalHostMemoryBytes -= task.hostMemoryBytes
+  }
+
+  def getTotalHostMemoryBytes: Long = totalHostMemoryBytes
+}
+
+/**
+ * TrafficController is responsible for blocking tasks from being scheduled when the throttle
+ * is exceeded. It also keeps track of the number of tasks that are currently scheduled.
+ *
+ * This class is thread-safe as it is used by multiple tasks.
+ */
+class TrafficController protected[rapids] (throttle: Throttle) {
+
+  @GuardedBy("this")
+  private var numTasks: Int = 0
+
+  /**
+   * Blocks the task from being scheduled until the throttle allows it. If there is no task
+   * currently scheduled, the task is scheduled immediately even if the throttle is exceeded.
+   */
+  def blockUntilRunnable[T](task: Task[T]): Unit = synchronized {
+    if (numTasks > 0) {
+      while (!throttle.canAccept(task)) {
+        wait(100)
+      }
+    }
+    numTasks += 1
+    throttle.taskScheduled(task)
+  }
+
+  def taskCompleted[T](task: Task[T]): Unit = synchronized {
+    numTasks -= 1
+    throttle.taskCompleted(task)
+    notify()
+  }
+
+  def numScheduledTasks: Int = synchronized {
+    numTasks
+  }
+}
+
+object TrafficController {
+
+  private var instance: TrafficController = _
+
+  /**
+   * Initializes the TrafficController singleton instance.
+   * This is called once per executor.
+   */
+  def initialize(conf: RapidsConf): Unit = synchronized {
+    if (conf.isAsyncOutputWriteEnabled && instance == null) {
+      instance = new TrafficController(
+        new HostMemoryThrottle(conf.asyncWriteMaxInFlightHostMemoryBytes))
+    }
+  }
+
+  def getInstance: Option[TrafficController] = synchronized {
+    Option(instance)
+  }
+
+  def shutdown(): Unit = synchronized {
+    if (instance != null) {
+      instance = null
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/profiler.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/profiler.scala
index 924a75a7b65..d9449cbe5ac 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/profiler.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/profiler.scala
@@ -50,10 +50,13 @@ object ProfilerOnExecutor extends Logging {
   private var isProfileActive = false
   private var currentContextMethod: Method = null
   private var getContextMethod: Method = null
+  private val stageTaskCount = mutable.HashMap[Int, Int]()
+  private var stageTaskLimit = 0
 
   def init(pluginCtx: PluginContext, conf: RapidsConf): Unit = {
     require(writer.isEmpty, "Already initialized")
     timeRanges = conf.profileTimeRangesSeconds.map(parseTimeRanges)
+    stageTaskLimit = conf.profileTaskLimitPerStage
     jobRanges = new RangeConfMatcher(conf, RapidsConf.PROFILE_JOBS)
     stageRanges = new RangeConfMatcher(conf, RapidsConf.PROFILE_STAGES)
     driverPollMillis = conf.profileDriverPollMillis
@@ -118,10 +121,36 @@ object ProfilerOnExecutor extends Logging {
       val taskCtx = TaskContext.get
       val stageId = taskCtx.stageId
       if (stageRanges.contains(stageId)) {
-        synchronized {
-          activeStages.add(taskCtx.stageId)
-          enable()
-          startPollingDriver()
+        if (stageTaskLimit <= 0) {
+          // Unlimited tasks per stage
+          synchronized {
+            activeStages.add(taskCtx.stageId)
+            enable()
+            startPollingDriver()
+          }
+        } else {
+          // Limited tasks per stage
+          val currentCount = synchronized(stageTaskCount.getOrElseUpdate(stageId, 0))
+          if (currentCount < stageTaskLimit) {
+            synchronized {
+              activeStages.add(taskCtx.stageId)
+              enable()
+              startPollingDriver()
+            }
+            taskCtx.addTaskCompletionListener[Unit] { _ =>
+              synchronized {
+                stageTaskCount.get(stageId) match {
+                  case Some(count) if (count < stageTaskLimit) => {
+                    stageTaskCount(stageId) = count + 1
+                  }
+                  case Some(_) => {
+                    activeStages.remove(stageId)
+                  }
+                  case None =>
+                }
+              }
+            }
+          }
         }
       }
     }
@@ -241,17 +270,18 @@ object ProfilerOnExecutor extends Logging {
   private def updateActiveFromDriver(): Unit = {
     writer.foreach { w =>
       val (jobs, stages) = synchronized {
-        (activeJobs.toArray, activeStages.toArray)
+        (activeJobs.toArray, (activeStages ++ stageTaskCount.keys).toArray)
       }
       val (completedJobs, completedStages, allDone) =
         w.pluginCtx.ask(ProfileJobStageQueryMsg(jobs, stages))
           .asInstanceOf[(Array[Int], Array[Int], Boolean)]
-      if (completedJobs.nonEmpty || completedStages.nonEmpty) {
-        synchronized {
-          completedJobs.foreach(activeJobs.remove)
-          completedStages.foreach(activeStages.remove)
-          if (activeJobs.isEmpty && activeStages.isEmpty) {
-            disable()
+      synchronized {
+        completedJobs.foreach(activeJobs.remove)
+        completedStages.foreach(activeStages.remove)
+        completedStages.foreach(stageTaskCount.remove)
+        if (activeJobs.isEmpty && activeStages.isEmpty) {
+          disable()
+          if (stageTaskCount.isEmpty) {
             stopPollingDriver()
           }
         }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/window/GpuUnboundedToUnboundedAggWindowExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/window/GpuUnboundedToUnboundedAggWindowExec.scala
index d685efe68e0..7c5b55cd0bd 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/window/GpuUnboundedToUnboundedAggWindowExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/window/GpuUnboundedToUnboundedAggWindowExec.scala
@@ -17,10 +17,9 @@
 package com.nvidia.spark.rapids.window
 
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
-import scala.reflect.ClassTag
 
 import ai.rapids.cudf
-import com.nvidia.spark.rapids.{ConcatAndConsumeAll, GpuAlias, GpuBindReferences, GpuBoundReference, GpuColumnVector, GpuExpression, GpuLiteral, GpuMetric, GpuProjectExec, SpillableColumnarBatch, SpillPriorities}
+import com.nvidia.spark.rapids.{AutoClosableArrayBuffer, ConcatAndConsumeAll, GpuAlias, GpuBindReferences, GpuBoundReference, GpuColumnVector, GpuExpression, GpuLiteral, GpuMetric, GpuProjectExec, SpillableColumnarBatch, SpillPriorities}
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits.AutoCloseableProducingSeq
 import com.nvidia.spark.rapids.RmmRapidsRetryIterator.{splitSpillableInHalfByRows, withRetry, withRetryNoSplit}
@@ -36,32 +35,6 @@ import org.apache.spark.sql.rapids.aggregate.{CudfAggregate, GpuAggregateExpress
 import org.apache.spark.sql.types.{DataType, IntegerType, LongType}
 import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 
-
-/**
- * Just a simple wrapper to make working with buffers of AutoClosable things play
- * nicely with withResource.
- */
-class AutoClosableArrayBuffer[T <: AutoCloseable]() extends AutoCloseable {
-  private val data = new ArrayBuffer[T]()
-
-  def append(scb: T): Unit = data.append(scb)
-
-  def last: T = data.last
-
-  def removeLast(): T = data.remove(data.length - 1)
-
-  def foreach[U](f: T => U): Unit = data.foreach(f)
-
-  def toArray[B >: T : ClassTag]: Array[B] = data.toArray
-
-  override def toString: String = s"AutoCloseable(${super.toString})"
-
-  override def close(): Unit = {
-    data.foreach(_.close())
-    data.clear()
-  }
-}
-
 /**
  * Utilities for conversion between SpillableColumnarBatch, ColumnarBatch, and cudf.Table.
  */
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
index 506b22a22ab..e6a2d506e37 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala
@@ -90,11 +90,6 @@ object GpuJsonScan {
       meta.willNotWorkOnGpu(s"$op does not support allowUnquotedFieldNames")
     }
 
-    // {'name': 'Reynold Xin'} turning single quotes off is not supported by CUDF
-    if (!options.allowSingleQuotes) {
-      meta.willNotWorkOnGpu(s"$op does not support disabling allowSingleQuotes")
-    }
-
     // {"name": "Cazen Lee", "price": "\$10"} is not supported by CUDF
     if (options.allowBackslashEscapingAnyCharacter) {
       meta.willNotWorkOnGpu(s"$op does not support allowBackslashEscapingAnyCharacter")
@@ -177,6 +172,12 @@ object GpuJsonScan {
     }
 
     if (hasDates || hasTimestamps) {
+      if (!meta.conf.isJsonDateTimeReadEnabled) {
+        meta.willNotWorkOnGpu("JsonScan on GPU does not support DateType or TimestampType" +
+          " by default due to compatibility. " +
+          "Set `spark.rapids.sql.json.read.datetime.enabled` to `true` to enable them.")
+      }
+
       if (!GpuOverrides.isUTCTimezone(parsedOptions.zoneId)) {
         meta.willNotWorkOnGpu(s"Not supported timezone type ${parsedOptions.zoneId}.")
       }
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/GpuPartitioningUtils.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/GpuPartitioningUtils.scala
index 780cd6dc5fc..83ae5fb2afd 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/GpuPartitioningUtils.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/execution/datasources/rapids/GpuPartitioningUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -99,8 +99,8 @@ object GpuPartitioningUtils extends SQLConfHelper {
         typeInference = sparkSession.sessionState.conf.partitionColumnTypeInferenceEnabled,
         basePaths = basePaths,
         userSpecifiedSchema = userSpecifiedSchema,
-        caseSensitive = sparkSession.sqlContext.conf.caseSensitiveAnalysis,
-        validatePartitionColumns = sparkSession.sqlContext.conf.validatePartitionColumns,
+        caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis,
+        validatePartitionColumns = sparkSession.sessionState.conf.validatePartitionColumns,
         timeZoneId = timeZoneId)
       (parsed, anyReplacedBase)
     }
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/LogicalPlanRules.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/LogicalPlanRules.scala
new file mode 100644
index 00000000000..923d92572b5
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/LogicalPlanRules.scala
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.nvidia
+
+import com.nvidia.spark.rapids.RapidsConf
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+case class LogicalPlanRules() extends Rule[LogicalPlan] with Logging {
+  val replacePartialFunc: PartialFunction[Expression, Expression] = {
+    case f: ScalaUDF if DFUDF.getDFUDF(f.function).isDefined =>
+      DFUDF.getDFUDF(f.function).map {
+        dfudf => DFUDFShims.columnToExpr(
+          dfudf(f.children.map(DFUDFShims.exprToColumn(_)).toArray))
+      }.getOrElse{
+        throw new IllegalStateException("Inconsistent results when extracting df_udf")
+      }
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (RapidsConf.DFUDF_ENABLED.get(plan.conf)) {
+      plan.transformExpressions(replacePartialFunc)
+    } else {
+      plan
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/dataframe_udfs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/dataframe_udfs.scala
new file mode 100644
index 00000000000..79f71ba4ca0
--- /dev/null
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/nvidia/dataframe_udfs.scala
@@ -0,0 +1,340 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.nvidia
+
+import java.lang.invoke.SerializedLambda
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.api.java._
+import org.apache.spark.util.Utils
+
+trait DFUDF {
+  def apply(input: Array[Column]): Column
+}
+
+case class DFUDF0(f: Function0[Column])
+  extends UDF0[Any] with DFUDF {
+  override def call(): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 0)
+    f()
+  }
+}
+
+case class DFUDF1(f: Function1[Column, Column])
+  extends UDF1[Any, Any] with DFUDF {
+  override def call(t1: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 1)
+    f(input(0))
+  }
+}
+
+case class DFUDF2(f: Function2[Column, Column, Column])
+  extends UDF2[Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 2)
+    f(input(0), input(1))
+  }
+}
+
+case class DFUDF3(f: Function3[Column, Column, Column, Column])
+  extends UDF3[Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 3)
+    f(input(0), input(1), input(2))
+  }
+}
+
+case class DFUDF4(f: Function4[Column, Column, Column, Column, Column])
+  extends UDF4[Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 4)
+    f(input(0), input(1), input(2), input(3))
+  }
+}
+
+case class DFUDF5(f: Function5[Column, Column, Column, Column, Column, Column])
+  extends UDF5[Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 5)
+    f(input(0), input(1), input(2), input(3), input(4))
+  }
+}
+
+case class DFUDF6(f: Function6[Column, Column, Column, Column, Column, Column, Column])
+  extends UDF6[Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 6)
+    f(input(0), input(1), input(2), input(3), input(4), input(5))
+  }
+}
+
+case class DFUDF7(f: Function7[Column, Column, Column, Column, Column, Column, Column, Column])
+  extends UDF7[Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 7)
+    f(input(0), input(1), input(2), input(3), input(4), input(5), input(6))
+  }
+}
+
+case class DFUDF8(f: Function8[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column])
+  extends UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 8)
+    f(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7))
+  }
+}
+
+case class DFUDF9(f: Function9[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column, Column])
+  extends UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any,
+                    t9: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 9)
+    f(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7), input(8))
+  }
+}
+
+case class DFUDF10(f: Function10[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column, Column, Column])
+  extends UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any,
+                    t9: Any, t10: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 10)
+    f(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7), input(8),
+      input(9))
+  }
+}
+
+case class JDFUDF0(f: UDF0[Column])
+  extends UDF0[Any] with DFUDF {
+  override def call(): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 0)
+    f.call()
+  }
+}
+
+case class JDFUDF1(f: UDF1[Column, Column])
+  extends UDF1[Any, Any] with DFUDF {
+  override def call(t1: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 1)
+    f.call(input(0))
+  }
+}
+
+case class JDFUDF2(f: UDF2[Column, Column, Column])
+  extends UDF2[Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 2)
+    f.call(input(0), input(1))
+  }
+}
+
+case class JDFUDF3(f: UDF3[Column, Column, Column, Column])
+  extends UDF3[Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 3)
+    f.call(input(0), input(1), input(2))
+  }
+}
+
+case class JDFUDF4(f: UDF4[Column, Column, Column, Column, Column])
+  extends UDF4[Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 4)
+    f.call(input(0), input(1), input(2), input(3))
+  }
+}
+
+case class JDFUDF5(f: UDF5[Column, Column, Column, Column, Column, Column])
+  extends UDF5[Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 5)
+    f.call(input(0), input(1), input(2), input(3), input(4))
+  }
+}
+
+case class JDFUDF6(f: UDF6[Column, Column, Column, Column, Column, Column, Column])
+  extends UDF6[Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 6)
+    f.call(input(0), input(1), input(2), input(3), input(4), input(5))
+  }
+}
+
+case class JDFUDF7(f: UDF7[Column, Column, Column, Column, Column, Column, Column, Column])
+  extends UDF7[Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 7)
+    f.call(input(0), input(1), input(2), input(3), input(4), input(5), input(6))
+  }
+}
+
+case class JDFUDF8(f: UDF8[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column])
+  extends UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 8)
+    f.call(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7))
+  }
+}
+
+case class JDFUDF9(f: UDF9[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column, Column])
+  extends UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any,
+                    t9: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 9)
+    f.call(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7), input(8))
+  }
+}
+
+case class JDFUDF10(f: UDF10[Column, Column, Column, Column, Column, Column, Column, Column,
+  Column, Column, Column])
+  extends UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] with DFUDF {
+  override def call(t1: Any, t2: Any, t3: Any, t4: Any, t5: Any, t6: Any, t7: Any, t8: Any,
+                    t9: Any, t10: Any): Any = {
+    throw new IllegalStateException("TODO better error message. This should have been replaced")
+  }
+
+  override def apply(input: Array[Column]): Column = {
+    assert(input.length == 10)
+    f.call(input(0), input(1), input(2), input(3), input(4), input(5), input(6), input(7), input(8),
+      input(9))
+  }
+}
+
+object DFUDF {
+  /**
+   * Determine if the UDF function implements the DFUDF.
+   */
+  def getDFUDF(function: AnyRef): Option[DFUDF] = {
+    function match {
+      case f: DFUDF => Some(f)
+      case f =>
+        try {
+          // This may be a lambda that Spark's UDFRegistration wrapped around a Java UDF instance.
+          val clazz = f.getClass
+          if (Utils.getSimpleName(clazz).toLowerCase().contains("lambda")) {
+            // Try to find a `writeReplace` method, further indicating it is likely a lambda
+            // instance, and invoke it to serialize the lambda. Once serialized, captured arguments
+            // can be examined to locate the Java UDF instance.
+            // Note this relies on implementation details of Spark's UDFRegistration class.
+            val writeReplace = clazz.getDeclaredMethod("writeReplace")
+            writeReplace.setAccessible(true)
+            val serializedLambda = writeReplace.invoke(f).asInstanceOf[SerializedLambda]
+            if (serializedLambda.getCapturedArgCount == 1) {
+              serializedLambda.getCapturedArg(0) match {
+                case c: DFUDF => Some(c)
+                case _ => None
+              }
+            } else {
+              None
+            }
+          } else {
+            None
+          }
+        } catch {
+          case _: ClassCastException | _: NoSuchMethodException | _: SecurityException => None
+        }
+    }
+  }
+}
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
index 5589bca0435..99bbcb199d2 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
@@ -242,7 +242,7 @@ abstract class GpuDataSourceBase(
 
       // This is a non-streaming file based datasource.
       case (format: FileFormat, _) =>
-        val useCatalogFileIndex = sparkSession.sqlContext.conf.manageFilesourcePartitions &&
+        val useCatalogFileIndex = sparkSession.sessionState.conf.manageFilesourcePartitions &&
           catalogTable.isDefined && catalogTable.get.tracksPartitionsInCatalog &&
           catalogTable.get.partitionColumnNames.nonEmpty
         val (fileCatalog, dataSchema, partitionSchema) = if (useCatalogFileIndex) {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInMemoryTableScanExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInMemoryTableScanExec.scala
index 1e38e2ab74e..434f6aa1f90 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInMemoryTableScanExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInMemoryTableScanExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -136,7 +136,7 @@ case class GpuInMemoryTableScanExec(
   override def outputOrdering: Seq[SortOrder] =
     relation.cachedPlan.outputOrdering.map(updateAttribute(_).asInstanceOf[SortOrder])
 
-  lazy val enableAccumulatorsForTest: Boolean = sparkSession.sqlContext
+  lazy val enableAccumulatorsForTest: Boolean = sparkSession.sessionState
       .conf.inMemoryTableScanStatisticsEnabled
 
   // Accumulators used for testing purposes
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala
index c593eebe26e..017d9722257 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonReadCommon.scala
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-
 package org.apache.spark.sql.rapids
 
 import java.util.Locale
 
-import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, Scalar, Schema, Table}
+import ai.rapids.cudf.{ColumnVector, ColumnView, DType, NvtxColor, NvtxRange, Schema, Table}
 import com.fasterxml.jackson.core.JsonParser
-import com.nvidia.spark.rapids.{ColumnCastUtil, GpuCast, GpuColumnVector, GpuScalar, GpuTextBasedPartitionReader}
+import com.nvidia.spark.rapids.{ColumnCastUtil, GpuColumnVector, GpuScalar, GpuTextBasedPartitionReader}
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.RapidsPluginImplicits.AutoCloseableProducingArray
-import com.nvidia.spark.rapids.jni.CastStrings
+import com.nvidia.spark.rapids.jni.JSONUtils
 
 import org.apache.spark.sql.catalyst.json.{GpuJsonUtils, JSONOptions}
 import org.apache.spark.sql.rapids.shims.GpuJsonToStructsShim
@@ -47,8 +46,10 @@ object GpuJsonReadCommon {
       }
     case _: MapType =>
       throw new IllegalArgumentException("MapType is not supported yet for schema conversion")
+    case dt: DecimalType =>
+      builder.addColumn(GpuColumnVector.getNonNestedRapidsType(dt), name, dt.precision)
     case _ =>
-      builder.addColumn(DType.STRING, name)
+      builder.addColumn(GpuColumnVector.getNonNestedRapidsType(dt), name)
   }
 
   /**
@@ -62,160 +63,6 @@ object GpuJsonReadCommon {
     builder.build
   }
 
-  private def isQuotedString(input: ColumnView): ColumnVector = {
-    withResource(Scalar.fromString("\"")) { quote =>
-      withResource(input.startsWith(quote)) { sw =>
-        withResource(input.endsWith(quote)) { ew =>
-          sw.binaryOp(BinaryOp.LOGICAL_AND, ew, DType.BOOL8)
-        }
-      }
-    }
-  }
-
-  private def stripFirstAndLastChar(input: ColumnView): ColumnVector = {
-    withResource(Scalar.fromInt(1)) { one =>
-      val end = withResource(input.getCharLengths) { cc =>
-        withResource(cc.sub(one)) { endWithNulls =>
-          withResource(endWithNulls.isNull) { eIsNull =>
-            eIsNull.ifElse(one, endWithNulls)
-          }
-        }
-      }
-      withResource(end) { _ =>
-        withResource(ColumnVector.fromScalar(one, end.getRowCount.toInt)) { start =>
-          input.substring(start, end)
-        }
-      }
-    }
-  }
-
-  private def undoKeepQuotes(input: ColumnView): ColumnVector = {
-    withResource(isQuotedString(input)) { iq =>
-      withResource(stripFirstAndLastChar(input)) { stripped =>
-        iq.ifElse(stripped, input)
-      }
-    }
-  }
-
-  private def fixupQuotedStrings(input: ColumnView): ColumnVector = {
-    withResource(isQuotedString(input)) { iq =>
-      withResource(stripFirstAndLastChar(input)) { stripped =>
-        withResource(Scalar.fromString(null)) { ns =>
-          iq.ifElse(stripped, ns)
-        }
-      }
-    }
-  }
-
-  private lazy val specialUnquotedFloats =
-    Seq("NaN", "+INF", "-INF", "+Infinity", "Infinity", "-Infinity")
-  private lazy val specialQuotedFloats = specialUnquotedFloats.map(s => '"'+s+'"')
-
-  /**
-   * JSON has strict rules about valid numeric formats. See https://www.json.org/ for specification.
-   *
-   * Spark then has its own rules for supporting NaN and Infinity, which are not
-   * valid numbers in JSON.
-   */
-  private def sanitizeFloats(input: ColumnView, options: JSONOptions): ColumnVector = {
-    // Note that this is not 100% consistent with Spark versions prior to Spark 3.3.0
-    // due to https://issues.apache.org/jira/browse/SPARK-38060
-    if (options.allowNonNumericNumbers) {
-      // Need to normalize the quotes to non-quoted to parse properly
-      withResource(ColumnVector.fromStrings(specialQuotedFloats: _*)) { quoted =>
-        withResource(ColumnVector.fromStrings(specialUnquotedFloats: _*)) { unquoted =>
-          input.findAndReplaceAll(quoted, unquoted)
-        }
-      }
-    } else {
-      input.copyToColumnVector()
-    }
-  }
-
-  private def sanitizeInts(input: ColumnView): ColumnVector = {
-    // Integer numbers cannot look like a float, so no `.` or e The rest of the parsing should
-    // handle this correctly. The rest of the validation is in CUDF itself
-
-    val tmp = withResource(Scalar.fromString(".")) { dot =>
-      withResource(input.stringContains(dot)) { hasDot =>
-        withResource(Scalar.fromString("e")) { e =>
-          withResource(input.stringContains(e)) { hase =>
-            hasDot.or(hase)
-          }
-        }
-      }
-    }
-    val invalid = withResource(tmp) { _ =>
-      withResource(Scalar.fromString("E")) { E =>
-        withResource(input.stringContains(E)) { hasE =>
-          tmp.or(hasE)
-        }
-      }
-    }
-    withResource(invalid) { _ =>
-      withResource(Scalar.fromNull(DType.STRING)) { nullString =>
-        invalid.ifElse(nullString, input)
-      }
-    }
-  }
-
-  private def sanitizeQuotedDecimalInUSLocale(input: ColumnView): ColumnVector = {
-    // The US locale is kind of special in that it will remove the , and then parse the
-    // input normally
-    withResource(stripFirstAndLastChar(input)) { stripped =>
-      withResource(Scalar.fromString(",")) { comma =>
-        withResource(Scalar.fromString("")) { empty =>
-          stripped.stringReplace(comma, empty)
-        }
-      }
-    }
-  }
-
-  private def sanitizeDecimal(input: ColumnView, options: JSONOptions): ColumnVector = {
-    assert(options.locale == Locale.US)
-    withResource(isQuotedString(input)) { isQuoted =>
-      withResource(sanitizeQuotedDecimalInUSLocale(input)) { quoted =>
-        isQuoted.ifElse(quoted, input)
-      }
-    }
-  }
-
-  private def castStringToFloat(input: ColumnView, dt: DType,
-      options: JSONOptions): ColumnVector = {
-    withResource(sanitizeFloats(input, options)) { sanitizedInput =>
-      CastStrings.toFloat(sanitizedInput, false, dt)
-    }
-  }
-
-  private def castStringToDecimal(input: ColumnVector, dt: DecimalType): ColumnVector = {
-    // TODO there is a bug here around 0 https://github.com/NVIDIA/spark-rapids/issues/10898
-    CastStrings.toDecimal(input, false, false, dt.precision, -dt.scale)
-  }
-
-  private def castJsonStringToBool(input: ColumnView): ColumnVector = {
-    // Sadly there is no good kernel right now to do just this check/conversion
-    val isTrue = withResource(Scalar.fromString("true")) { trueStr =>
-      input.equalTo(trueStr)
-    }
-    withResource(isTrue) { _ =>
-      val isFalse = withResource(Scalar.fromString("false")) { falseStr =>
-        input.equalTo(falseStr)
-      }
-      val falseOrNull = withResource(isFalse) { _ =>
-        withResource(Scalar.fromBool(false)) { falseLit =>
-          withResource(Scalar.fromNull(DType.BOOL8)) { nul =>
-            isFalse.ifElse(falseLit, nul)
-          }
-        }
-      }
-      withResource(falseOrNull) { _ =>
-        withResource(Scalar.fromBool(true)) { trueLit =>
-          isTrue.ifElse(trueLit, falseOrNull)
-        }
-      }
-    }
-  }
-
   private def dateFormat(options: JSONOptions): Option[String] =
     GpuJsonUtils.optionalDateFormatInRead(options)
 
@@ -228,7 +75,7 @@ object GpuJsonReadCommon {
   }
 
   private def nestedColumnViewMismatchTransform(cv: ColumnView,
-      dt: DataType): (Option[ColumnView], Seq[AutoCloseable]) = {
+    dt: DataType): (Option[ColumnView], Seq[AutoCloseable]) = {
     // In the future we should be able to convert strings to maps/etc, but for
     // now we are working around issues where CUDF is not returning a STRING for nested
     // types when asked for it.
@@ -264,43 +111,40 @@ object GpuJsonReadCommon {
     }
   }
 
+  private def convertStringToDate(input: ColumnView, options: JSONOptions): ColumnVector = {
+    withResource(JSONUtils.removeQuotes(input, /*nullifyIfNotQuoted*/ true)) { removedQuotes =>
+      GpuJsonToStructsShim.castJsonStringToDateFromScan(removedQuotes, DType.TIMESTAMP_DAYS,
+        dateFormat(options))
+    }
+  }
+
+  private def convertStringToTimestamp(input: ColumnView, options: JSONOptions): ColumnVector = {
+    withResource(JSONUtils.removeQuotes(input, /*nullifyIfNotQuoted*/ true)) { removedQuotes =>
+      GpuTextBasedPartitionReader.castStringToTimestamp(removedQuotes, timestampFormat(options),
+        DType.TIMESTAMP_MICROSECONDS)
+    }
+  }
+
   private def convertToDesiredType(inputCv: ColumnVector,
       topLevelType: DataType,
       options: JSONOptions): ColumnVector = {
     ColumnCastUtil.deepTransform(inputCv, Some(topLevelType),
       Some(nestedColumnViewMismatchTransform)) {
-      case (cv, Some(BooleanType)) if cv.getType == DType.STRING =>
-        castJsonStringToBool(cv)
       case (cv, Some(DateType)) if cv.getType == DType.STRING =>
-        withResource(fixupQuotedStrings(cv)) { fixed =>
-          GpuJsonToStructsShim.castJsonStringToDateFromScan(fixed, DType.TIMESTAMP_DAYS,
-            dateFormat(options))
-        }
+        convertStringToDate(cv, options)
       case (cv, Some(TimestampType)) if cv.getType == DType.STRING =>
-        withResource(fixupQuotedStrings(cv)) { fixed =>
-          GpuTextBasedPartitionReader.castStringToTimestamp(fixed, timestampFormat(options),
-            DType.TIMESTAMP_MICROSECONDS)
-        }
-      case (cv, Some(StringType)) if cv.getType == DType.STRING =>
-        undoKeepQuotes(cv)
-      case (cv, Some(dt: DecimalType)) if cv.getType == DType.STRING =>
-        withResource(sanitizeDecimal(cv, options)) { tmp =>
-          castStringToDecimal(tmp, dt)
-        }
-      case (cv, Some(dt)) if (dt == DoubleType || dt == FloatType) && cv.getType == DType.STRING =>
-        castStringToFloat(cv,  GpuColumnVector.getNonNestedRapidsType(dt), options)
-      case (cv, Some(dt))
-        if (dt == ByteType || dt == ShortType || dt == IntegerType || dt == LongType ) &&
-            cv.getType == DType.STRING =>
-        withResource(sanitizeInts(cv)) { tmp =>
-          CastStrings.toInteger(tmp, false, GpuColumnVector.getNonNestedRapidsType(dt))
-        }
+        convertStringToTimestamp(cv, options)
       case (cv, Some(dt)) if cv.getType == DType.STRING =>
-        GpuCast.doCast(cv, StringType, dt)
+        // There is an issue with the Schema implementation such that the schema's top level
+        // is never used when passing down data schema from Java to C++.
+        // As such, we have to wrap the current column schema `dt` in a struct schema.
+        val builder = Schema.builder // This is created as a struct schema
+        populateSchema(dt, "", builder)
+        JSONUtils.convertFromStrings(cv, builder.build, options.allowNonNumericNumbers,
+          options.locale == Locale.US)
     }
   }
 
-
   /**
    * Convert the parsed input table to the desired output types
    * @param table the table to start with
@@ -311,10 +155,33 @@ object GpuJsonReadCommon {
   def convertTableToDesiredType(table: Table,
       desired: StructType,
       options: JSONOptions): Array[ColumnVector] = {
-    val dataTypes = desired.fields.map(_.dataType)
-    dataTypes.zipWithIndex.safeMap {
-      case (dt, i) =>
-        convertToDesiredType(table.getColumn(i), dt, options)
+    withResource(new NvtxRange("convertTableToDesiredType", NvtxColor.RED)) { _ =>
+      val dataTypes = desired.fields.map(_.dataType)
+      dataTypes.zipWithIndex.safeMap {
+        case (dt, i) =>
+          convertToDesiredType(table.getColumn(i), dt, options)
+      }
+    }
+  }
+
+  /**
+   * Convert a strings column into date/time types.
+   * @param inputCv The input column vector
+   * @param topLevelType The desired output data type
+   * @param options JSON options for the conversion
+   * @return The converted column vector
+   */
+  def convertDateTimeType(inputCv: ColumnVector,
+      topLevelType: DataType,
+      options: JSONOptions): ColumnVector = {
+    withResource(new NvtxRange("convertDateTimeType", NvtxColor.RED)) { _ =>
+      ColumnCastUtil.deepTransform(inputCv, Some(topLevelType),
+        Some(nestedColumnViewMismatchTransform)) {
+        case (cv, Some(DateType)) if cv.getType == DType.STRING =>
+          convertStringToDate(cv, options)
+        case (cv, Some(TimestampType)) if cv.getType == DType.STRING =>
+          convertStringToTimestamp(cv, options)
+      }
     }
   }
 
@@ -323,21 +190,21 @@ object GpuJsonReadCommon {
     // and this is the only way I know to get it without even uglier tricks
     @scala.annotation.nowarn("msg=Java enum ALLOW_UNQUOTED_CONTROL_CHARS in " +
       "Java enum Feature is deprecated")
-    val allowUnquotedControlChars =
-      options.buildJsonFactory()
-        .isEnabled(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
+    val allowUnquotedControlChars = options.buildJsonFactory()
+      .isEnabled(JsonParser.Feature.ALLOW_UNQUOTED_CONTROL_CHARS)
+
     ai.rapids.cudf.JSONOptions.builder()
-    .withRecoverWithNull(true)
-    .withMixedTypesAsStrings(true)
-    .withNormalizeWhitespace(true)
-    .withKeepQuotes(true)
-    .withNormalizeSingleQuotes(options.allowSingleQuotes)
-    .withStrictValidation(true)
-    .withLeadingZeros(options.allowNumericLeadingZeros)
-    .withNonNumericNumbers(options.allowNonNumericNumbers)
-    .withUnquotedControlChars(allowUnquotedControlChars)
-    .withCudfPruneSchema(true)
-    .withExperimental(true)
-    .build()
+      .withRecoverWithNull(true)
+      .withMixedTypesAsStrings(true)
+      .withNormalizeWhitespace(true)
+      .withKeepQuotes(true)
+      .withNormalizeSingleQuotes(options.allowSingleQuotes)
+      .withStrictValidation(true)
+      .withLeadingZeros(options.allowNumericLeadingZeros)
+      .withNonNumericNumbers(options.allowNonNumericNumbers)
+      .withUnquotedControlChars(allowUnquotedControlChars)
+      .withCudfPruneSchema(true)
+      .withExperimental(true)
+      .build()
   }
 }
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
index e60aefb8d59..5a08f561a19 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala
@@ -16,206 +16,57 @@
 
 package org.apache.spark.sql.rapids
 
+import java.util.Locale
+
 import ai.rapids.cudf
-import ai.rapids.cudf.{BaseDeviceMemoryBuffer, ColumnVector, ColumnView, Cuda, DataSource, DeviceMemoryBuffer, HostMemoryBuffer, Scalar}
-import com.nvidia.spark.rapids.{GpuColumnVector, GpuScalar, GpuUnaryExpression, HostAlloc}
-import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
+import ai.rapids.cudf.{NvtxColor, NvtxRange}
+import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
+import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.jni.JSONUtils
-import org.apache.commons.text.StringEscapeUtils
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
-import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, NullIntolerant, TimeZoneAwareExpression}
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.json.JSONOptions
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
 import org.apache.spark.sql.types._
 
-/**
- *  Exception thrown when cudf cannot parse the JSON data because some Json to Struct cases are not
- *  currently supported.
- */
-class JsonParsingException(s: String, cause: Throwable) extends RuntimeException(s, cause) {}
-
-class JsonDeviceDataSource(combined: ColumnVector) extends DataSource {
-  lazy val data: BaseDeviceMemoryBuffer = combined.getData
-  lazy val totalSize: Long = data.getLength
-  override def size(): Long = totalSize
-
-  override def hostRead(offset: Long, length: Long): HostMemoryBuffer = {
-    val realLength = math.min(totalSize - offset, length)
-    withResource(data.slice(offset, realLength)) { sliced =>
-      closeOnExcept(HostAlloc.alloc(realLength)) { hostMemoryBuffer =>
-        hostMemoryBuffer.copyFromDeviceBuffer(sliced.asInstanceOf[DeviceMemoryBuffer])
-        hostMemoryBuffer
-      }
-    }
-  }
-
-  override def hostRead(offset: Long, hostMemoryBuffer: HostMemoryBuffer): Long = {
-    val length = math.min(totalSize - offset, hostMemoryBuffer.getLength)
-    withResource(data.slice(offset, length)) { sliced =>
-      hostMemoryBuffer.copyFromDeviceBuffer(sliced.asInstanceOf[DeviceMemoryBuffer])
-    }
-    length
-  }
-
-  override def supportsDeviceRead = true
-
-  override def deviceRead(offset: Long, dest: DeviceMemoryBuffer, stream: Cuda.Stream): Long = {
-    val length = math.min(totalSize - offset, dest.getLength)
-    dest.copyFromDeviceBufferAsync(0, data, offset, length, stream)
-    length
-  }
-
-  override def close(): Unit = {
-    combined.close()
-    super.close()
-  }
-}
-
 case class GpuJsonToStructs(
     schema: DataType,
     options: Map[String, String],
     child: Expression,
     timeZoneId: Option[String] = None)
     extends GpuUnaryExpression with TimeZoneAwareExpression with ExpectsInputTypes
-        with NullIntolerant {
+        with NullIntolerantShim {
   import GpuJsonReadCommon._
 
-  private lazy val emptyRowStr = constructEmptyRow(schema)
-
-  private def constructEmptyRow(schema: DataType): String = {
-    schema match {
-      case struct: StructType if struct.fields.nonEmpty =>
-        s"""{"${StringEscapeUtils.escapeJson(struct.head.name)}":null}"""
-      case other =>
-        throw new IllegalArgumentException(s"$other is not supported as a top level type")    }
-  }
-
-  private def cleanAndConcat(input: cudf.ColumnVector): (cudf.ColumnVector, cudf.ColumnVector) = {
-    val stripped = if (input.getData == null) {
-      input.incRefCount
-    } else {
-      withResource(cudf.Scalar.fromString(" ")) { space =>
-        input.strip(space)
-      }
-    }
-
-    withResource(stripped) { stripped =>
-      val isEmpty = withResource(stripped.getByteCount) { lengths =>
-        withResource(cudf.Scalar.fromInt(0)) { zero =>
-          lengths.lessOrEqualTo(zero)
-        }
-      }
-      val isNullOrEmptyInput = withResource(isEmpty) { _ =>
-        withResource(input.isNull) { isNull =>
-          isNull.binaryOp(cudf.BinaryOp.NULL_LOGICAL_OR, isEmpty, cudf.DType.BOOL8)
-        }
-      }
-      closeOnExcept(isNullOrEmptyInput) { _ =>
-        withResource(cudf.Scalar.fromString(emptyRowStr)) { emptyRow =>
-          // TODO is it worth checking if any are empty or null and then skipping this?
-          withResource(isNullOrEmptyInput.ifElse(emptyRow, stripped)) { nullsReplaced =>
-            val isLiteralNull = withResource(Scalar.fromString("null")) { literalNull =>
-              nullsReplaced.equalTo(literalNull)
-            }
-            withResource(isLiteralNull) { _ =>
-              withResource(isLiteralNull.ifElse(emptyRow, nullsReplaced)) { cleaned =>
-                checkForNewline(cleaned, "\n", "line separator")
-                checkForNewline(cleaned, "\r", "carriage return")
-
-                // add a newline to each JSON line
-                val withNewline = withResource(cudf.Scalar.fromString("\n")) { lineSep =>
-                  withResource(ColumnVector.fromScalar(lineSep, cleaned.getRowCount.toInt)) {
-                    newLineCol =>
-                      ColumnVector.stringConcatenate(Array[ColumnView](cleaned, newLineCol))
-                  }
-                }
-
-                // We technically don't need to join the strings together as we just want the buffer
-                // which should be the same either way.
-                (isNullOrEmptyInput, withNewline)
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  private def checkForNewline(cleaned: ColumnVector, newlineStr: String, name: String): Unit = {
-    withResource(cudf.Scalar.fromString(newlineStr)) { newline =>
-      withResource(cleaned.stringContains(newline)) { hasNewline =>
-        withResource(hasNewline.any()) { anyNewline =>
-          if (anyNewline.isValid && anyNewline.getBoolean) {
-            throw new IllegalArgumentException(
-              s"We cannot currently support parsing JSON that contains a $name in it")
-          }
-        }
-      }
-    }
-  }
-
   private lazy val parsedOptions = new JSONOptions(
     options,
     timeZoneId.get,
     SQLConf.get.columnNameOfCorruptRecord)
 
-  private lazy val jsonOptions =
-    GpuJsonReadCommon.cudfJsonOptions(parsedOptions)
+  private lazy val cudfOptions = GpuJsonReadCommon.cudfJsonOptions(parsedOptions)
 
   override protected def doColumnar(input: GpuColumnVector): cudf.ColumnVector = {
-    schema match {
-      case _: MapType =>
-        JSONUtils.extractRawMapFromJsonString(input.getBase)
-      case struct: StructType => {
-        // if we ever need to support duplicate keys we need to keep track of the duplicates
-        //  and make the first one null, but I don't think this will ever happen in practice
-        val cudfSchema = makeSchema(struct)
-
-        // We cannot handle all corner cases with this right now. The parser just isn't
-        // good enough, but we will try to handle a few common ones.
-        val numRows = input.getRowCount.toInt
-
-        // Step 1: verify and preprocess the data to clean it up and normalize a few things
-        // Step 2: Concat the data into a single buffer
-        val (isNullOrEmpty, combined) = cleanAndConcat(input.getBase)
-        withResource(isNullOrEmpty) { isNullOrEmpty =>
-          // Step 3: setup a datasource
-          val table = withResource(new JsonDeviceDataSource(combined)) { ds =>
-            // Step 4: Have cudf parse the JSON data
-            try {
-              cudf.Table.readJSON(cudfSchema, jsonOptions, ds, numRows)
-            } catch {
-              case e : RuntimeException =>
-                throw new JsonParsingException("Currently some Json to Struct cases " +
-                  "are not supported. Consider to set spark.rapids.sql.expression.JsonToStructs" +
-                  "=false", e)
-            }
-          }
-
-          // process duplicated field names in input struct schema
-
-          withResource(table) { _ =>
-            // Step 5: verify that the data looks correct
-            if (table.getRowCount != numRows) {
-              throw new IllegalStateException("The input data didn't parse correctly and we read " +
-                  s"a different number of rows than was expected. Expected $numRows, " +
-                  s"but got ${table.getRowCount}")
-            }
-
-            // Step 7: turn the data into a Struct
-            withResource(convertTableToDesiredType(table, struct, parsedOptions)) { columns =>
-              withResource(cudf.ColumnVector.makeStruct(columns: _*)) { structData =>
-                // Step 8: put nulls back in for nulls and empty strings
-                withResource(GpuScalar.from(null, struct)) { nullVal =>
-                  isNullOrEmpty.ifElse(nullVal, structData)
-                }
-              }
+    withResource(new NvtxRange("GpuJsonToStructs", NvtxColor.YELLOW)) { _ =>
+      schema match {
+        case _: MapType => JSONUtils.extractRawMapFromJsonString(input.getBase, cudfOptions)
+        case struct: StructType =>
+          val parsedStructs = JSONUtils.fromJSONToStructs(input.getBase, makeSchema(struct),
+            cudfOptions, parsedOptions.locale == Locale.US)
+          val hasDateTime = TrampolineUtil.dataTypeExistsRecursively(struct, t =>
+            t.isInstanceOf[DateType] || t.isInstanceOf[TimestampType]
+          )
+          if (hasDateTime) {
+            withResource(parsedStructs) { _ =>
+              convertDateTimeType(parsedStructs, struct, parsedOptions)
             }
+          } else {
+            parsedStructs
           }
-        }
+        case _ => throw new IllegalArgumentException(
+          s"GpuJsonToStructs currently does not support schema of type $schema.")
       }
-      case _ => throw new IllegalArgumentException(
-        s"GpuJsonToStructs currently does not support schema of type $schema.")
     }
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuOrcFileFormat.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuOrcFileFormat.scala
index d2f4380646c..1d4bc66a1da 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuOrcFileFormat.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuOrcFileFormat.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.orc.{OrcFileFormat, OrcOptions, OrcUtils}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
 import org.apache.spark.sql.types._
 
 object GpuOrcFileFormat extends Logging {
@@ -83,6 +84,11 @@ object GpuOrcFileFormat extends Logging {
     // [[org.apache.spark.sql.execution.datasources.DaysWritable]] object
     // which is a subclass of [[org.apache.hadoop.hive.serde2.io.DateWritable]].
     val types = schema.map(_.dataType).toSet
+    val hasBools = schema.exists { field =>
+      TrampolineUtil.dataTypeExistsRecursively(field.dataType, t =>
+        t.isInstanceOf[BooleanType])
+    }
+
     if (types.exists(GpuOverrides.isOrContainsDateOrTimestamp(_))) {
       if (!GpuOverrides.isUTCTimezone()) {
         meta.willNotWorkOnGpu("Only UTC timezone is supported for ORC. " +
@@ -91,6 +97,10 @@ object GpuOrcFileFormat extends Logging {
       }
     }
 
+    if (hasBools && !meta.conf.isOrcBoolTypeEnabled) {
+      meta.willNotWorkOnGpu("Nullable Booleans can not work in certain cases with ORC writer." +
+        "See https://github.com/rapidsai/cudf/issues/6763")
+    }
     FileFormatChecks.tag(meta, schema, OrcFormatType, WriteFileOp)
 
     val sqlConf = spark.sessionState.conf
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala
index 20f0181c7e1..8874d2a1904 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuParseUrl.scala
@@ -40,10 +40,10 @@ object GpuParseUrl {
 
   def isSupportedPart(part: String): Boolean = {
     part match {
-      case PROTOCOL | HOST | QUERY | PATH =>
-        true
-      case _ =>
+      case REF | FILE | AUTHORITY | USERINFO =>
         false
+      case _ => // PROTOCOL, HOST, QUERY, PATH and invalid parts are supported
+        true
     }
   }
 }
@@ -73,7 +73,7 @@ case class GpuParseUrl(children: Seq[Expression])
         throw new UnsupportedOperationException(s"$this is not supported partToExtract=$part. " +
             s"Only PROTOCOL, HOST, QUERY and PATH are supported")
       case _ =>
-        throw new IllegalArgumentException(s"Invalid partToExtract: $partToExtract")
+        return GpuColumnVector.columnVectorFromNull(url.getRowCount.toInt, StringType)
     }
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
index ea12a483c82..fff61679019 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuStructsToJson.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, StructsToJson}
 import org.apache.spark.sql.catalyst.json.GpuJsonUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.execution.TrampolineUtil
-import org.apache.spark.sql.types.{DataType, DateType, StringType, StructType, TimestampType}
+import org.apache.spark.sql.types.{DataType, DateType, MapType, StringType, TimestampType}
 
 class GpuStructsToJsonMeta(
     expr: StructsToJson,
@@ -67,9 +67,17 @@ class GpuStructsToJsonMeta(
       }
     }
 
-    if (LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy) {
+    if ((hasDates || hasTimestamps) && LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy) {
       willNotWorkOnGpu("LEGACY timeParserPolicy is not supported in GpuJsonToStructs")
     }
+
+    val hasNonStringMapKey = TrampolineUtil.dataTypeExistsRecursively(expr.child.dataType, {
+      case mt: MapType if !mt.keyType.isInstanceOf[StringType] => true
+      case _ => false
+    })
+    if (hasNonStringMapKey) {
+      willNotWorkOnGpu("Only strings are supported as keys for Maps")
+    }
   }
 
   override def convertToGpu(child: Expression): GpuExpression =
@@ -83,7 +91,7 @@ case class GpuStructsToJson(
   override protected def doColumnar(input: GpuColumnVector): ColumnVector = {
     val ignoreNullFields = options.getOrElse("ignoreNullFields", SQLConf.get.getConfString(
       SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS.key)).toBoolean
-    GpuCast.castStructToJsonString(input.getBase, child.dataType.asInstanceOf[StructType].fields,
+    GpuCast.castToString(input.getBase, child.dataType,
       new CastOptions(legacyCastComplexTypesToString = false, ansiMode = false,
         stringToDateAnsiMode = false, castToJsonString = true,
         ignoreNullFieldsInStructs = ignoreNullFields))
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuTaskMetrics.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuTaskMetrics.scala
index ce6f321bb93..84ca5e2ac51 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuTaskMetrics.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuTaskMetrics.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -121,6 +121,39 @@ class GpuTaskMetrics extends Serializable {
   private val readSpillFromDiskTimeNs = new NanoSecondAccumulator
 
   private val maxDeviceMemoryBytes = new HighWatermarkAccumulator
+  private val maxHostMemoryBytes = new HighWatermarkAccumulator
+  private val maxDiskMemoryBytes = new HighWatermarkAccumulator
+
+  private var maxHostBytesAllocated: Long = 0
+
+  private var maxDiskBytesAllocated: Long = 0
+
+  def getDiskBytesAllocated: Long = GpuTaskMetrics.diskBytesAllocated
+
+  def getMaxDiskBytesAllocated: Long = maxDiskBytesAllocated
+
+  def getHostBytesAllocated: Long = GpuTaskMetrics.hostBytesAllocated
+
+  def getMaxHostBytesAllocated: Long = maxHostBytesAllocated
+
+  def incHostBytesAllocated(bytes: Long): Unit = {
+    GpuTaskMetrics.incHostBytesAllocated(bytes)
+    maxHostBytesAllocated = maxHostBytesAllocated.max(GpuTaskMetrics.hostBytesAllocated)
+  }
+
+  def decHostBytesAllocated(bytes: Long): Unit = {
+    GpuTaskMetrics.decHostBytesAllocated(bytes)
+  }
+
+
+  def incDiskBytesAllocated(bytes: Long): Unit = {
+    GpuTaskMetrics.incDiskBytesAllocated(bytes)
+    maxDiskBytesAllocated = maxDiskBytesAllocated.max(GpuTaskMetrics.diskBytesAllocated)
+  }
+
+  def decDiskBytesAllocated(bytes: Long): Unit = {
+    GpuTaskMetrics.decHostBytesAllocated(bytes)
+  }
 
   private val metrics = Map[String, AccumulatorV2[_, _]](
     "gpuSemaphoreWait" -> semWaitTimeNs,
@@ -132,7 +165,9 @@ class GpuTaskMetrics extends Serializable {
     "gpuSpillToDiskTime" -> spillToDiskTimeNs,
     "gpuReadSpillFromHostTime" -> readSpillFromHostTimeNs,
     "gpuReadSpillFromDiskTime" -> readSpillFromDiskTimeNs,
-    "gpuMaxDeviceMemoryBytes" -> maxDeviceMemoryBytes
+    "gpuMaxDeviceMemoryBytes" -> maxDeviceMemoryBytes,
+    "gpuMaxHostMemoryBytes" -> maxHostMemoryBytes,
+    "gpuMaxDiskMemoryBytes" -> maxDiskMemoryBytes
   )
 
   def register(sc: SparkContext): Unit = {
@@ -211,16 +246,22 @@ class GpuTaskMetrics extends Serializable {
     }
   }
 
-  def updateMaxGpuMemory(taskAttemptId: Long): Unit = {
+  def updateMaxMemory(taskAttemptId: Long): Unit = {
     val maxMem = RmmSpark.getAndResetGpuMaxMemoryAllocated(taskAttemptId)
     if (maxMem > 0) {
-      // This metric tracks the max amount of memory that is allocated on the gpu during
-      // the lifespan of a task. However, this update function only gets called once on task
-      // completion, whereas the actual logic tracking of the max value during memory allocations
-      // lives in the JNI. Therefore, we can stick the convention here of calling the add method
-      // instead of adding a dedicated max method to the accumulator.
+      // These metrics track the max amount of memory that is allocated on the gpu and disk,
+      // respectively, during the lifespan of a task. However, this update function only gets called
+      // once on task completion, whereas the actual logic tracking of the max value during memory
+      // allocations lives in the JNI. Therefore, we can stick the convention here of calling the
+      // add method instead of adding a dedicated max method to the accumulator.
       maxDeviceMemoryBytes.add(maxMem)
     }
+    if (maxHostBytesAllocated > 0) {
+      maxHostMemoryBytes.add(maxHostBytesAllocated)
+    }
+    if (maxDiskBytesAllocated > 0) {
+      maxDiskMemoryBytes.add(maxDiskBytesAllocated)
+    }
   }
 }
 
@@ -230,6 +271,25 @@ class GpuTaskMetrics extends Serializable {
 object GpuTaskMetrics extends Logging {
   private val taskLevelMetrics = mutable.Map[Long, GpuTaskMetrics]()
 
+  private var hostBytesAllocated: Long = 0
+  private var diskBytesAllocated: Long = 0
+
+  private def incHostBytesAllocated(bytes: Long): Unit = synchronized {
+    hostBytesAllocated += bytes
+  }
+
+  private def decHostBytesAllocated(bytes: Long): Unit = synchronized {
+    hostBytesAllocated -= bytes
+  }
+
+  def incDiskBytesAllocated(bytes: Long): Unit = synchronized {
+    diskBytesAllocated += bytes
+  }
+
+  def decDiskBytesAllocated(bytes: Long): Unit = synchronized {
+    diskBytesAllocated -= bytes
+  }
+
   def registerOnTask(metrics: GpuTaskMetrics): Unit = synchronized {
     val tc = TaskContext.get()
     if (tc != null) {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala
index bf568480baf..854b905baf6 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala
@@ -21,16 +21,16 @@ import com.nvidia.spark.rapids.{GpuColumnVector, GpuExpression, GpuProjectExec,
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.jni.Hash
-import com.nvidia.spark.rapids.shims.{HashUtils, ShimExpression}
+import com.nvidia.spark.rapids.shims.{HashUtils, NullIntolerantShim, ShimExpression}
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, NullIntolerant}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuMd5(child: Expression)
-  extends GpuUnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuUnaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
   override def toString: String = s"md5($child)"
   override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
   override def dataType: DataType = StringType
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala
index da54735aaf4..05bc76c3fab 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/RapidsShuffleInternalManagerBase.scala
@@ -21,7 +21,6 @@ import java.util.Optional
 import java.util.concurrent.{Callable, ConcurrentHashMap, ExecutionException, Executors, Future, LinkedBlockingQueue, TimeUnit}
 import java.util.concurrent.atomic.{AtomicInteger, AtomicLong}
 
-import scala.collection
 import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
 
@@ -38,7 +37,6 @@ import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.network.buffer.ManagedBuffer
-import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.shuffle.{ShuffleWriter, _}
 import org.apache.spark.shuffle.api._
@@ -244,10 +242,8 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
     maxBytesInFlight: Long,
     shuffleExecutorComponents: ShuffleExecutorComponents,
     numWriterThreads: Int)
-      extends ShuffleWriter[K, V]
-        with RapidsShuffleWriterShimHelper
-        with Logging {
-  private var myMapStatus: Option[MapStatus] = None
+      extends RapidsShuffleWriter[K, V]
+        with RapidsShuffleWriterShimHelper {
   private val metrics = handle.metrics
   private val serializationTimeMetric =
     metrics.get("rapidsShuffleSerializationTime")
@@ -265,14 +261,6 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
   private val transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true)
   private val fileBufferSize = sparkConf.get(config.SHUFFLE_FILE_BUFFER_SIZE).toInt * 1024
   private val limiter = new BytesInFlightLimiter(maxBytesInFlight)
-  /**
-   * Are we in the process of stopping? Because map tasks can call stop() with success = true
-   * and then call stop() with success = false if they get an exception, we want to make sure
-   * we don't try deleting files, etc twice.
-   */
-  private var stopping = false
-
-  private val diskBlockObjectWriters = new mutable.HashMap[Int, (Int, DiskBlockObjectWriter)]()
 
   /**
    * Simple wrapper that tracks the time spent iterating the given iterator.
@@ -452,7 +440,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
             shuffleCombineTimeMetric.foreach(_ += combineTimeNs)
             pl
           }
-          myMapStatus = Some(MapStatus(blockManager.shuffleServerId, partLengths, mapId))
+          myMapStatus = Some(getMapStatus(blockManager.shuffleServerId, partLengths, mapId))
         } catch {
           // taken directly from BypassMergeSortShuffleWriter
           case e: Exception =>
@@ -537,36 +525,7 @@ abstract class RapidsShuffleThreadedWriterBase[K, V](
     }
   }
 
-  override def stop(success: Boolean): Option[MapStatus] = {
-    if (stopping) {
-      None
-    } else {
-      stopping = true
-      if (success) {
-        if (myMapStatus.isEmpty) {
-          // should not happen, but adding it just in case (this differs from Spark)
-          cleanupTempData()
-          throw new IllegalStateException("Cannot call stop(true) without having called write()");
-        }
-        myMapStatus
-      } else {
-        cleanupTempData()
-        None
-      }
-    }
-  }
 
-  private def cleanupTempData(): Unit = {
-    // The map task failed, so delete our output data.
-    try {
-      diskBlockObjectWriters.values.foreach { case (_, writer) =>
-        val file = writer.revertPartialWritesAndClose()
-        if (!file.delete()) logError(s"Error while deleting file ${file.getAbsolutePath()}")
-      }
-    } finally {
-      diskBlockObjectWriters.clear()
-    }
-  }
 
   def getBytesInFlight: Long = limiter.getBytesInFlight
 }
@@ -804,14 +763,14 @@ abstract class RapidsShuffleThreadedReaderBase[K, C](
 
     case class BlockState(
         blockId: BlockId,
-        batchIter: SerializedBatchIterator,
+        batchIter: BaseSerializedTableIterator,
         origStream: AutoCloseable)
       extends Iterator[(Any, Any)] with AutoCloseable {
 
       private var nextBatchSize = {
         var success = false
         try {
-          val res = batchIter.tryReadNextHeader().getOrElse(0L)
+          val res = batchIter.peekNextBatchSize().getOrElse(0L)
           success = true
           res
         } finally {
@@ -831,7 +790,7 @@ abstract class RapidsShuffleThreadedReaderBase[K, C](
         val nextBatch = batchIter.next()
         var success = false
         try {
-          nextBatchSize = batchIter.tryReadNextHeader().getOrElse(0L)
+          nextBatchSize = batchIter.peekNextBatchSize().getOrElse(0L)
           success = true
           nextBatch
         } finally {
@@ -982,7 +941,8 @@ abstract class RapidsShuffleThreadedReaderBase[K, C](
               readBlockedTime += System.nanoTime() - readBlockedStart
 
               val deserStream = serializerInstance.deserializeStream(inputStream)
-              val batchIter = deserStream.asKeyValueIterator.asInstanceOf[SerializedBatchIterator]
+              val batchIter = deserStream.asKeyValueIterator
+                .asInstanceOf[BaseSerializedTableIterator]
               val blockState = BlockState(blockId, batchIter, inputStream)
               // get the next known batch size (there could be multiple batches)
               if (limiter.acquire(blockState.getNextBatchSize)) {
@@ -1086,10 +1046,7 @@ class RapidsCachingWriter[K, V](
     catalog: ShuffleBufferCatalog,
     rapidsShuffleServer: Option[RapidsShuffleServer],
     metrics: Map[String, SQLMetric])
-  extends ShuffleWriter[K, V]
-    with Logging {
-  private val numParts = handle.dependency.partitioner.numPartitions
-  private val sizes = new Array[Long](numParts)
+  extends RapidsCachingWriterBase[K, V](blockManager, handle, mapId, rapidsShuffleServer, catalog) {
 
   private val uncompressedMetric: SQLMetric = metrics("dataSize")
 
@@ -1178,41 +1135,6 @@ class RapidsCachingWriter[K, V](
     }
   }
 
-  /**
-   * Used to remove shuffle buffers when the writing task detects an error, calling `stop(false)`
-   */
-  private def cleanStorage(): Unit = {
-    catalog.removeCachedHandles()
-  }
-
-  override def stop(success: Boolean): Option[MapStatus] = {
-    val nvtxRange = new NvtxRange("RapidsCachingWriter.close", NvtxColor.CYAN)
-    try {
-      if (!success) {
-        cleanStorage()
-        None
-      } else {
-        // upon seeing this port, the other side will try to connect to the port
-        // in order to establish an UCX endpoint (on demand), if the topology has "rapids" in it.
-        val shuffleServerId = if (rapidsShuffleServer.isDefined) {
-          val originalShuffleServerId = rapidsShuffleServer.get.originalShuffleServerId
-          val server = rapidsShuffleServer.get
-          BlockManagerId(
-            originalShuffleServerId.executorId,
-            originalShuffleServerId.host,
-            originalShuffleServerId.port,
-            Some(s"${RapidsShuffleTransport.BLOCK_MANAGER_ID_TOPO_PREFIX}=${server.getPort}"))
-        } else {
-          blockManager.shuffleServerId
-        }
-        logInfo(s"Done caching shuffle success=$success, server_id=$shuffleServerId, "
-            + s"map_id=$mapId, sizes=${sizes.mkString(",")}")
-        Some(MapStatus(shuffleServerId, sizes, mapId))
-      }
-    } finally {
-      nvtxRange.close()
-    }
-  }
 
   def getPartitionLengths(): Array[Long] = {
     throw new UnsupportedOperationException("TODO")
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/arithmetic.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/arithmetic.scala
index abfdcde2a90..fecbd273aa4 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/arithmetic.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/arithmetic.scala
@@ -23,10 +23,10 @@ import ai.rapids.cudf.ast.BinaryOperator
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
-import com.nvidia.spark.rapids.shims.{DecimalMultiply128, GpuTypeShims, ShimExpression, SparkShimImpl}
+import com.nvidia.spark.rapids.shims.{DecimalMultiply128, GpuTypeShims, NullIntolerantShim, ShimExpression, SparkShimImpl}
 
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.{ComplexTypeMergingExpression, ExpectsInputTypes, Expression, NullIntolerant}
+import org.apache.spark.sql.catalyst.expressions.{ComplexTypeMergingExpression, ExpectsInputTypes, Expression}
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
@@ -169,7 +169,7 @@ object GpuAnsi {
 }
 
 case class GpuUnaryMinus(child: Expression, failOnError: Boolean) extends GpuUnaryExpression
-    with ExpectsInputTypes with NullIntolerant {
+    with ExpectsInputTypes with NullIntolerantShim {
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
   override def dataType: DataType = child.dataType
@@ -231,7 +231,7 @@ case class GpuUnaryMinus(child: Expression, failOnError: Boolean) extends GpuUna
 }
 
 case class GpuUnaryPositive(child: Expression) extends GpuUnaryExpression
-    with ExpectsInputTypes with NullIntolerant {
+    with ExpectsInputTypes with NullIntolerantShim {
   override def prettyName: String = "positive"
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
@@ -248,7 +248,7 @@ case class GpuUnaryPositive(child: Expression) extends GpuUnaryExpression
 }
 
 case class GpuAbs(child: Expression, failOnError: Boolean) extends CudfUnaryExpression
-    with ExpectsInputTypes with NullIntolerant {
+    with ExpectsInputTypes with NullIntolerantShim {
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
   override def dataType: DataType = child.dataType
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
index f9d0be81505..efc59749d2d 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
@@ -49,16 +49,16 @@ case class GpuRand(child: Expression) extends ShimUnaryExpression with GpuExpres
    */
   @transient protected var rng: RapidsXORShiftRandom = _
 
-  @transient protected lazy val seed: Long = child match {
+  private lazy val seed: Long = child match {
     case GpuLiteral(s, IntegerType) => s.asInstanceOf[Int]
     case GpuLiteral(s, LongType) => s.asInstanceOf[Long]
     case _ => throw new RapidsAnalysisException(
       s"Input argument to $prettyName must be an integer, long or null literal.")
   }
 
-  @transient protected var previousPartition: Int = 0
+  private var previousPartition: Int = 0
 
-  @transient protected var curXORShiftRandomSeed: Option[Long] = None
+  private var curXORShiftRandomSeed: Option[Long] = None
 
   private def wasInitialized: Boolean = rng != null
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
index b675ef2bfbd..61941caac85 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
@@ -19,16 +19,16 @@ package org.apache.spark.sql.rapids
 import java.util.Optional
 
 import ai.rapids.cudf
-import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, Scalar, SegmentedReductionAggregation, Table}
+import ai.rapids.cudf.{BinaryOp, ColumnVector, ColumnView, DType, ReductionAggregation, Scalar, SegmentedReductionAggregation, Table}
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm._
 import com.nvidia.spark.rapids.ArrayIndexUtils.firstIndexAndNumElementUnchecked
 import com.nvidia.spark.rapids.BoolUtils.isAllValidTrue
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
-import com.nvidia.spark.rapids.shims.{GetSequenceSize, ShimExpression}
+import com.nvidia.spark.rapids.shims.{GetSequenceSize, NullIntolerantShim, ShimExpression}
 
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.{ElementAt, ExpectsInputTypes, Expression, ImplicitCastInputTypes, NamedExpression, NullIntolerant, RowOrdering, Sequence, TimeZoneAwareExpression}
+import org.apache.spark.sql.catalyst.expressions.{ElementAt, ExpectsInputTypes, Expression, ImplicitCastInputTypes, NamedExpression, RowOrdering, Sequence, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
@@ -1065,7 +1065,7 @@ case class GpuArraysZip(children: Seq[Expression]) extends GpuExpression with Sh
 }
 
 // Base class for GpuArrayExcept, GpuArrayUnion, GpuArrayIntersect
-trait GpuArrayBinaryLike extends GpuComplexTypeMergingExpression with NullIntolerant {
+trait GpuArrayBinaryLike extends GpuComplexTypeMergingExpression with NullIntolerantShim {
   val left: Expression
   val right: Expression
 
@@ -1233,7 +1233,7 @@ case class GpuArrayUnion(left: Expression, right: Expression)
 }
 
 case class GpuArraysOverlap(left: Expression, right: Expression)
-    extends GpuBinaryExpression with ExpectsInputTypes with NullIntolerant {
+    extends GpuBinaryExpression with ExpectsInputTypes with NullIntolerantShim {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, ArrayType)
 
@@ -1552,7 +1552,7 @@ case class GpuArrayRemove(left: Expression, right: Expression) extends GpuBinary
   }
 }
 
-case class GpuFlattenArray(child: Expression) extends GpuUnaryExpression with NullIntolerant {
+case class GpuFlattenArray(child: Expression) extends GpuUnaryExpression with NullIntolerantShim {
   private def childDataType: ArrayType = child.dataType.asInstanceOf[ArrayType]
   override def nullable: Boolean = child.nullable || childDataType.containsNull
   override def dataType: DataType = childDataType.elementType
@@ -1651,7 +1651,8 @@ object GpuSequenceUtil {
   def computeSequenceSize(
       start: ColumnVector,
       stop: ColumnVector,
-      step: ColumnVector): ColumnVector = {
+      step: ColumnVector,
+      functionName: String): ColumnVector = {
     checkSequenceInputs(start, stop, step)
     val actualSize = GetSequenceSize(start, stop, step)
     val sizeAsLong = withResource(actualSize) { _ =>
@@ -1673,7 +1674,12 @@ object GpuSequenceUtil {
       // check max size
       withResource(Scalar.fromInt(MAX_ROUNDED_ARRAY_LENGTH)) { maxLen =>
         withResource(sizeAsLong.lessOrEqualTo(maxLen)) { allValid =>
-          require(isAllValidTrue(allValid), GetSequenceSize.TOO_LONG_SEQUENCE)
+          withResource(sizeAsLong.reduce(ReductionAggregation.max())) { maxSizeScalar =>
+            require(isAllValidTrue(allValid),
+              RapidsErrorUtils.getTooLongSequenceErrorString(
+                maxSizeScalar.getLong.asInstanceOf[Int],
+                functionName))
+          }
         }
       }
       // cast to int and return
@@ -1713,7 +1719,7 @@ case class GpuSequence(start: Expression, stop: Expression, stepOpt: Option[Expr
           val steps = stepGpuColOpt.map(_.getBase.incRefCount())
               .getOrElse(defaultStepsFunc(startCol, stopCol))
           closeOnExcept(steps) { _ =>
-            (computeSequenceSize(startCol, stopCol, steps), steps)
+            (computeSequenceSize(startCol, stopCol, steps, prettyName), steps)
           }
         }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala
index b2b52953a7e..3fad6242407 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeExtractors.scala
@@ -35,7 +35,7 @@ case class GpuGetStructField(child: Expression, ordinal: Int, name: Option[Strin
     extends ShimUnaryExpression
     with GpuExpression
     with ShimGetStructField
-    with NullIntolerant {
+    with NullIntolerantShim {
 
   lazy val childSchema: StructType = child.dataType.asInstanceOf[StructType]
 
@@ -200,7 +200,7 @@ case class GpuGetArrayItem(child: Expression, ordinal: Expression, failOnError:
 }
 
 case class GpuGetMapValue(child: Expression, key: Expression, failOnError: Boolean)
-  extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   private def keyType = child.dataType.asInstanceOf[MapType].keyType
 
@@ -269,7 +269,7 @@ case class GpuGetMapValue(child: Expression, key: Expression, failOnError: Boole
 /** Checks if the array (left) has the element (right)
 */
 case class GpuArrayContains(left: Expression, right: Expression)
-  extends GpuBinaryExpression with NullIntolerant {
+  extends GpuBinaryExpression with NullIntolerantShim {
 
   override def dataType: DataType = BooleanType
 
@@ -367,7 +367,7 @@ case class GpuGetArrayStructFields(
     numFields: Int,
     containsNull: Boolean) extends GpuUnaryExpression
     with ShimGetArrayStructFields
-    with NullIntolerant {
+    with NullIntolerantShim {
 
   override def dataType: DataType = ArrayType(field.dataType, containsNull)
   override def toString: String = s"$child.${field.name}"
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
index 9653fc6be71..0f382a7b6e6 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala
@@ -19,15 +19,18 @@ package org.apache.spark.sql.rapids
 import java.time.ZoneId
 import java.util.concurrent.TimeUnit
 
-import ai.rapids.cudf.{BinaryOp, CaptureGroups, ColumnVector, ColumnView, DType, RegexProgram, Scalar}
+import scala.concurrent.duration.DAYS
+
+import ai.rapids.cudf.{BinaryOp, CaptureGroups, ColumnVector, ColumnView, DateTimeRoundingFrequency, DType, RegexProgram, Scalar}
 import com.nvidia.spark.rapids.{BinaryExprMeta, BoolUtils, DataFromReplacementRule, DateUtils, GpuBinaryExpression, GpuBinaryExpressionArgsAnyScalar, GpuCast, GpuColumnVector, GpuExpression, GpuOverrides, GpuScalar, GpuUnaryExpression, RapidsConf, RapidsMeta}
 import com.nvidia.spark.rapids.Arm._
+import com.nvidia.spark.rapids.ExprMeta
 import com.nvidia.spark.rapids.GpuOverrides.{extractStringLit, getTimeParserPolicy}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.jni.GpuTimeZoneDB
-import com.nvidia.spark.rapids.shims.ShimBinaryExpression
+import com.nvidia.spark.rapids.shims.{NullIntolerantShim, ShimBinaryExpression, ShimExpression}
 
-import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ExpectsInputTypes, Expression, FromUnixTime, FromUTCTimestamp, ImplicitCastInputTypes, NullIntolerant, TimeZoneAwareExpression, ToUTCTimestamp}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ExpectsInputTypes, Expression, FromUnixTime, FromUTCTimestamp, ImplicitCastInputTypes, MonthsBetween, TimeZoneAwareExpression, ToUTCTimestamp}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -43,7 +46,7 @@ trait GpuDateUnaryExpression extends GpuUnaryExpression with ImplicitCastInputTy
 }
 
 trait GpuTimeUnaryExpression extends GpuUnaryExpression with TimeZoneAwareExpression
-   with ImplicitCastInputTypes with NullIntolerant {
+   with ImplicitCastInputTypes with NullIntolerantShim {
   override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
 
   override def dataType: DataType = IntegerType
@@ -641,7 +644,11 @@ object GpuToTimestamp {
       raw"\A\d{4}-\d{1,2}-\d{1,2}[ T]\d{1,2}:\d{1,2}:\d{1,2}(\D|\s|\Z)"),
     "yyyy/MM/dd HH:mm:ss" -> ParseFormatMeta(Option('/'), isTimestamp = true,
       raw"\A\d{4}/\d{1,2}/\d{1,2}[ T]\d{1,2}:\d{1,2}:\d{1,2}(\D|\s|\Z)"),
+    "yyyyMMdd HH:mm:ss" -> ParseFormatMeta(None, isTimestamp = true,
+      raw"\A\d{4}\d{1,2}\d{1,2}[ T]\d{1,2}:\d{1,2}:\d{1,2}(\D|\s|\Z)"),
     "yyyyMMdd" -> ParseFormatMeta(None, isTimestamp = false,
+      raw"\A\d{8}(\D|\s|\Z)"),
+    "yyyymmdd" -> ParseFormatMeta(None, isTimestamp = false,
       raw"\A\d{8}(\D|\s|\Z)")
   )
 
@@ -1133,7 +1140,7 @@ case class GpuFromUTCTimestamp(
     timestamp: Expression, timezone: Expression, zoneId: ZoneId)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
-      with NullIntolerant {
+      with NullIntolerantShim {
 
   override def left: Expression = timestamp
   override def right: Expression = timezone
@@ -1176,7 +1183,7 @@ case class GpuToUTCTimestamp(
     timestamp: Expression, timezone: Expression, zoneId: ZoneId)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
-      with NullIntolerant {
+      with NullIntolerantShim {
 
   override def left: Expression = timestamp
   override def right: Expression = timezone
@@ -1204,6 +1211,245 @@ case class GpuToUTCTimestamp(
   }
 }
 
+class MonthsBetweenExprMeta(expr: MonthsBetween,
+                            override val conf: RapidsConf,
+                            override val parent: Option[RapidsMeta[_, _, _]],
+                            rule: DataFromReplacementRule)
+  extends ExprMeta[MonthsBetween](expr, conf, parent, rule) {
+
+  // See https://github.com/NVIDIA/spark-rapids/issues/11800
+  override def isTimeZoneSupported = false
+
+  override def convertToGpu(): GpuExpression = {
+    val gpuChildren = childExprs.map(_.convertToGpu())
+    assert(gpuChildren.length == 3)
+    GpuMonthsBetween(gpuChildren(0), gpuChildren(1), gpuChildren(2), expr.timeZoneId)
+  }
+}
+
+object GpuMonthsBetween {
+  val UTC = GpuOverrides.UTC_TIMEZONE_ID
+
+  /**
+   * Convert the given timestamp in UTC to a specific time zone and close the original input.
+   * @param micros the timestamp in micros to convert
+   * @param normalizedZoneId the time zone to convert it to. Note that this should have
+   *                         already been normalized.
+   * @return the converted timestamp.
+   */
+  private def convertToZoneAndClose(micros: GpuColumnVector,
+                                    normalizedZoneId: ZoneId): ColumnVector = {
+    withResource(micros) { _ =>
+      if (normalizedZoneId.equals(UTC)) {
+        micros.getBase.incRefCount()
+      } else {
+        GpuTimeZoneDB.fromUtcTimestampToTimestamp(micros.getBase, normalizedZoneId)
+      }
+    }
+  }
+
+  private def calcMonths(converted: ColumnVector): ColumnVector = {
+    val yearInMonths = withResource(converted.year()) { year =>
+      withResource(Scalar.fromInt(12)) { monthsPerYear =>
+        year.mul(monthsPerYear)
+      }
+    }
+    withResource(yearInMonths) { _ =>
+      withResource(converted.month()) { month =>
+        yearInMonths.add(month)
+      }
+    }
+  }
+
+  /**
+   * When a timestamp is truncated to a month, calculate how many months are different
+   * between the two timestamps.
+   * @param converted1 the first timestamp (in the desired time zone)
+   * @param converted2 the second timestamp (in the desired time zone)
+   * @return the number of months different as a float64
+   */
+  private def calcMonthDiff(converted1: ColumnVector, converted2: ColumnVector): ColumnVector = {
+    withResource(calcMonths(converted1)) { months1 =>
+      withResource(calcMonths(converted2)) { months2 =>
+        months1.sub(months2, DType.FLOAT64)
+      }
+    }
+  }
+
+  private def isLastDayOfTheMonth(converted: ColumnVector, day: ColumnVector): ColumnVector = {
+    val lastDay = withResource(converted.lastDayOfMonth()) { ldm =>
+      ldm.day()
+    }
+    withResource(lastDay) { _ =>
+      lastDay.equalTo(day)
+    }
+  }
+
+  private def calcSecondsInDay(converted: ColumnVector): ColumnVector = {
+    // Find the number of seconds that are not counted for in a day
+
+    // Rounding down to the current day, only works if you are in a time zone with no
+    // transition rules. This is because if a transition happens in between the start
+    // of the day and the timestamp we will be off. As such this will need to change to
+    // support other time zones, and it will need to take the timezone into account when
+    // calculating this.
+    // https://github.com/NVIDIA/spark-rapids/issues/11800
+
+    // find the micros over by finding the part that is not days
+    val microsInDay = withResource(converted.dateTimeFloor(DateTimeRoundingFrequency.DAY)) { days =>
+      // But we cannot subtract timestamps directly. They are both micros
+      assert(days.getType == DType.TIMESTAMP_MICROSECONDS)
+      assert(converted.getType == DType.TIMESTAMP_MICROSECONDS)
+      withResource(days.bitCastTo(DType.INT64)) { longDays =>
+        withResource(converted.bitCastTo(DType.INT64)) { longConverted =>
+          longConverted.sub(longDays)
+        }
+      }
+    }
+
+    // Then convert that to seconds (java does not round so we can be simple about it)
+    withResource(microsInDay) { _ =>
+      withResource(Scalar.fromLong(DateTimeConstants.MICROS_PER_SECOND)) { mps =>
+        microsInDay.div(mps, DType.INT64)
+      }
+    }
+  }
+
+  /**
+   * In Spark if both dates have the same day of the month, or if both are
+   * the end of the month then we ignore diffs for days and below, otherwise
+   * we need to calculate that partial part of the month.
+   *
+   * @param converted1 the first timestamp (in the desired time zone)
+   * @param converted2 the second timestamp (in the desired time zone)
+   * @return a boolean column where true is return just the whole number months diff
+   *         and false is return the diff with days/time taken into account.
+   */
+  private def calcJustMonth(converted1: ColumnVector,
+                            converted2: ColumnVector): ColumnVector = {
+    withResource(converted1.day()) { dayOfMonth1 =>
+      withResource(converted2.day()) { dayOfMonth2 =>
+        val bothLastDay = withResource(isLastDayOfTheMonth(converted1, dayOfMonth1)) { isLastDay1 =>
+          withResource(isLastDayOfTheMonth(converted2, dayOfMonth2)) { isLastDay2 =>
+            isLastDay1.and(isLastDay2)
+          }
+        }
+        withResource(bothLastDay) { _ =>
+          withResource(dayOfMonth1.equalTo(dayOfMonth2)) { sameDayOfMonth =>
+            sameDayOfMonth.or(bothLastDay)
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Calculate the number of seconds that are different between the two timestamps
+   * ignoring the year and the month. This is because calcMonthDiff will have
+   * already calculated that part.
+   *
+   * @param converted1 the first timestamp (in the desired time zone)
+   * @param converted2 the second timestamp (in the desired time zone)
+   * @return an INT64 column containing the diff in seconds.
+   */
+  private def calcSecondsDiff(converted1: ColumnVector,
+                              converted2: ColumnVector): ColumnVector = {
+    // In theory, we could go directly to seconds in the month, but there
+    // may be some overflow issues according to Spark. Also,
+    // CUDF does not have a way to floor a timestamp to MONTHS, so it would
+    // be a two-step process anyway.
+    val daysDiffAsSeconds = withResource(converted1.day()) { day1 =>
+      withResource(converted2.day()) { day2 =>
+        withResource(day1.sub(day2)) { daysDiff =>
+          withResource(Scalar.fromLong(DateTimeConstants.SECONDS_PER_DAY)) { secsPerDay =>
+            daysDiff.mul(secsPerDay)
+          }
+        }
+      }
+    }
+    withResource(daysDiffAsSeconds) { _ =>
+      val secsInDayDiff = withResource(calcSecondsInDay(converted1)) { sid1 =>
+        withResource(calcSecondsInDay(converted2)) { sid2 =>
+          sid1.sub(sid2)
+        }
+      }
+      withResource(secsInDayDiff) { _ =>
+        daysDiffAsSeconds.add(secsInDayDiff)
+      }
+    }
+  }
+}
+
+case class GpuMonthsBetween(ts1: Expression,
+                            ts2: Expression,
+                            roundOff: Expression,
+                            timeZoneId: Option[String] = None) extends GpuExpression
+  with ShimExpression with TimeZoneAwareExpression with ImplicitCastInputTypes
+  with NullIntolerantShim {
+  import GpuMonthsBetween._
+
+  override def columnarEval(batch: ColumnarBatch): GpuColumnVector = {
+    val needsRoundOff = withResourceIfAllowed(roundOff.columnarEvalAny(batch)) {
+      case s: GpuScalar if (s.isValid) => Some(s.getBase.getBoolean)
+      case _: GpuScalar => None
+      case other =>
+        throw new IllegalArgumentException(s"Only literal roundoff values are supported $other")
+    }
+    if (needsRoundOff.isEmpty) {
+      // Special case so we always return null for this.
+      withResource(Scalar.fromNull(DType.FLOAT64)) { s =>
+        closeOnExcept(ColumnVector.fromScalar(s, batch.numRows())) { result =>
+          return GpuColumnVector.from(result, dataType)
+        }
+      }
+    }
+
+    val zoneId = timeZoneId.map(s => ZoneId.of(s).normalized()).getOrElse(UTC)
+    withResource(convertToZoneAndClose(ts1.columnarEval(batch), zoneId)) { converted1 =>
+      withResource(convertToZoneAndClose(ts2.columnarEval(batch), zoneId)) { converted2 =>
+        withResource(calcMonthDiff(converted1, converted2)) { monthDiff =>
+          withResource(calcJustMonth(converted1, converted2)) { justMonthDiff =>
+            withResource(calcSecondsDiff(converted1, converted2)) { secondsDiff =>
+              val partialMonth = withResource(Scalar.fromDouble(DAYS.toSeconds(31))) {
+                secondsInMonth =>
+                  secondsDiff.trueDiv(secondsInMonth)
+              }
+              val roundedPartialMonth = if (needsRoundOff.get) {
+                withResource(partialMonth) { _ =>
+                  partialMonth.round(8)
+                }
+              } else {
+                partialMonth
+              }
+              val diff = withResource(roundedPartialMonth) { _ =>
+                roundedPartialMonth.add(monthDiff)
+              }
+              withResource(diff) { _ =>
+                GpuColumnVector.from(justMonthDiff.ifElse(monthDiff, diff), dataType)
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType, BooleanType)
+
+  override def dataType: DataType = DoubleType
+
+  override def foldable: Boolean = children.forall(_.foldable)
+
+  override def nullable: Boolean = children.exists(_.nullable)
+
+  override def children: Seq[Expression] = Seq(ts1, ts2, roundOff)
+
+  override def prettyName: String = "months_between"
+}
+
 trait GpuDateMathBase extends GpuBinaryExpression with ExpectsInputTypes {
   override def inputTypes: Seq[AbstractDataType] =
     Seq(DateType, TypeCollection(IntegerType, ShortType, ByteType))
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExecBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExecBase.scala
index 5323fc89019..f17cfbac13f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExecBase.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuShuffleExchangeExecBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -169,6 +169,8 @@ abstract class GpuShuffleExchangeExecBase(
     child: SparkPlan) extends Exchange with ShimUnaryExecNode with GpuExec {
   import GpuMetric._
 
+  private lazy val useKudo = RapidsConf.SHUFFLE_KUDO_SERIALIZER_ENABLED.get(child.conf)
+
   private lazy val useGPUShuffle = {
     gpuOutputPartitioning match {
       case gpuPartitioning: GpuPartitioning => gpuPartitioning.usesGPUShuffle
@@ -231,7 +233,7 @@ abstract class GpuShuffleExchangeExecBase(
   // This value must be lazy because the child's output may not have been resolved
   // yet in all cases.
   private lazy val serializer: Serializer = new GpuColumnarBatchSerializer(
-    gpuLongMetric("dataSize"))
+    gpuLongMetric("dataSize"), sparkTypes, useKudo)
 
   @transient lazy val inputBatchRDD: RDD[ColumnarBatch] = child.executeColumnar()
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubPartitionHashJoin.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubPartitionHashJoin.scala
index fc4ad412dcc..0fea22356a2 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubPartitionHashJoin.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubPartitionHashJoin.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.rapids.execution
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import com.nvidia.spark.rapids.{GpuBatchUtils, GpuColumnVector, GpuExpression, GpuHashPartitioningBase, GpuMetric, SpillableColumnarBatch, SpillPriorities, TaskAutoCloseableResource}
+import com.nvidia.spark.rapids.{GpuBatchUtils, GpuColumnVector, GpuExpression, GpuHashPartitioningBase, GpuMetric, RmmRapidsRetryIterator, SpillableColumnarBatch, SpillPriorities, TaskAutoCloseableResource}
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 
@@ -179,9 +179,19 @@ class GpuBatchSubPartitioner(
         // 1) Hash partition on the batch
         val partedTable = GpuHashPartitioningBase.hashPartitionAndClose(
           gpuBatch, inputBoundKeys, realNumPartitions, "Sub-Hash Calculate", hashSeed)
+        val (spillBatch, partitions) = withResource(partedTable) { _ =>
+          // Convert to SpillableColumnarBatch for the following retry.
+          (SpillableColumnarBatch(GpuColumnVector.from(partedTable.getTable, types),
+            SpillPriorities.ACTIVE_BATCHING_PRIORITY),
+            partedTable.getPartitions)
+        }
         // 2) Split into smaller tables according to partitions
-        val subTables = withResource(partedTable) { _ =>
-          partedTable.getTable.contiguousSplit(partedTable.getPartitions.tail: _*)
+        val subTables = RmmRapidsRetryIterator.withRetryNoSplit(spillBatch) { _ =>
+          withResource(spillBatch.getColumnarBatch()) { cb =>
+            withResource(GpuColumnVector.from(cb)) { tbl =>
+              tbl.contiguousSplit(partitions.tail: _*)
+            }
+          }
         }
         // 3) Make each smaller table spillable and cache them in the queue
         withResource(subTables) { _ =>
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastExec.scala
index 72ed0e79504..e529e268f3f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastExec.scala
@@ -126,8 +126,10 @@ abstract class GpuSubqueryBroadcastMetaBase(
           } else {
             willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
           }
-        case _ =>
-          throw new AssertionError("should not reach here")
+
+        case unexpected =>
+          throw new AssertionError("Unexpected child exec in AdaptiveSparkPlan: " +
+            s"${unexpected.getClass.getName}")
       }
 
     case _ =>
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/InternalColumnarRddConverter.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/InternalColumnarRddConverter.scala
index 456155f19f5..6af0ee0a0c2 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/InternalColumnarRddConverter.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/InternalColumnarRddConverter.scala
@@ -715,7 +715,7 @@ object InternalColumnarRddConverter extends Logging {
     val b = batch.getOrElse({
       // We have to fall back to doing a slow transition.
       val converters = new GpuExternalRowToColumnConverter(schema)
-      val conf = new RapidsConf(df.sqlContext.conf)
+      val conf = new RapidsConf(df.sqlContext.sparkSession.sessionState.conf)
       val goal = TargetSize(conf.gpuTargetBatchSizeBytes)
       input.mapPartitions { rowIter =>
         new ExternalRowToColumnarIterator(rowIter, schema, goal, converters)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/ShuffledBatchRDD.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/ShuffledBatchRDD.scala
index 17fcebc3fc7..34c99f40dd9 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/ShuffledBatchRDD.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/ShuffledBatchRDD.scala
@@ -1,4 +1,6 @@
 /*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
index 8a88cc4024d..f5004484680 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
@@ -123,7 +123,12 @@ object TrampolineUtil {
    * @param amountSpilled amount of memory spilled in bytes
    */
   def incTaskMetricsDiskBytesSpilled(amountSpilled: Long): Unit = {
-    Option(TaskContext.get).foreach(_.taskMetrics().incDiskBytesSpilled(amountSpilled))
+    Option(TaskContext.get).foreach(tc => {
+      val metrics = tc.taskMetrics()
+      if (metrics != null) {
+        metrics.incDiskBytesSpilled(amountSpilled)
+      }
+    })
   }
 
   /**
@@ -176,7 +181,7 @@ object TrampolineUtil {
   }
 
   def getSparkConf(spark: SparkSession): SQLConf = {
-    spark.sqlContext.conf
+    spark.sessionState.conf
   }
 
   def setExecutorEnv(sc: SparkContext, key: String, value: String): Unit = {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/predicates.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/predicates.scala
index 9bff98b0036..fb956738e18 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/predicates.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/predicates.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@ import ai.rapids.cudf._
 import ai.rapids.cudf.ast.BinaryOperator
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions._
@@ -29,7 +30,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 
 case class GpuNot(child: Expression) extends CudfUnaryExpression
-    with Predicate with ImplicitCastInputTypes with NullIntolerant {
+    with Predicate with ImplicitCastInputTypes with NullIntolerantShim {
   override def toString: String = s"NOT $child"
 
   override def inputTypes: Seq[DataType] = Seq(BooleanType)
@@ -204,7 +205,7 @@ abstract class CudfBinaryComparison extends CudfBinaryOperator with Predicate {
  *  +-------------+------------+------------------+---------------+----+
  */
 case class GpuEqualTo(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = "="
   override def outputTypeOverride: DType = DType.BOOL8
   override def binaryOp: BinaryOp = BinaryOp.EQUAL
@@ -235,7 +236,7 @@ case class GpuEqualTo(left: Expression, right: Expression) extends CudfBinaryCom
 }
 
 case class GpuEqualNullSafe(left: Expression, right: Expression) extends CudfBinaryComparison
-  with NullIntolerant {
+  with NullIntolerantShim {
   override def symbol: String = "<=>"
   override def nullable: Boolean = false
   override def outputTypeOverride: DType = DType.BOOL8
@@ -263,7 +264,7 @@ case class GpuEqualNullSafe(left: Expression, right: Expression) extends CudfBin
  * where NaN != NaN (unlike most other cases) when pivoting on a float or double column.
  */
 case class GpuEqualToNoNans(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = "="
   override def outputTypeOverride: DType = DType.BOOL8
   override def binaryOp: BinaryOp = BinaryOp.EQUAL
@@ -287,7 +288,7 @@ case class GpuEqualToNoNans(left: Expression, right: Expression) extends CudfBin
  *  +-------------+------------+-----------------+---------------+----+
  */
 case class GpuGreaterThan(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = ">"
 
   override def outputTypeOverride: DType = DType.BOOL8
@@ -328,7 +329,7 @@ case class GpuGreaterThan(left: Expression, right: Expression) extends CudfBinar
  *  +-------------+------------+-----------------+---------------+-----+
  */
 case class GpuGreaterThanOrEqual(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = ">="
 
   override def outputTypeOverride: DType = DType.BOOL8
@@ -397,7 +398,7 @@ case class GpuGreaterThanOrEqual(left: Expression, right: Expression) extends Cu
  *  +-------------+------------+-----------------+---------------+-----+
  */
 case class GpuLessThan(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = "<"
 
   override def outputTypeOverride: DType = DType.BOOL8
@@ -438,7 +439,7 @@ case class GpuLessThan(left: Expression, right: Expression) extends CudfBinaryCo
  *  +-------------+------------+------------------+---------------+-----+
  */
 case class GpuLessThanOrEqual(left: Expression, right: Expression) extends CudfBinaryComparison
-    with NullIntolerant {
+    with NullIntolerantShim {
   override def symbol: String = "<="
 
   override def outputTypeOverride: DType = DType.BOOL8
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
index c8a90dc80ad..79db87f1736 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
@@ -18,20 +18,23 @@ package org.apache.spark.sql.rapids
 
 import java.nio.charset.Charset
 import java.text.DecimalFormatSymbols
-import java.util.{Locale, Optional}
+import java.util.{EnumSet, Locale, Optional}
 
+import scala.annotation.tailrec
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import ai.rapids.cudf.{BinaryOp, BinaryOperable, CaptureGroups, ColumnVector, ColumnView, DType, PadSide, RegexProgram, RoundMode, Scalar}
+import ai.rapids.cudf.{BinaryOp, BinaryOperable, CaptureGroups, ColumnVector, ColumnView, DType, PadSide, RegexFlag, RegexProgram, RoundMode, Scalar}
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm._
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.jni.CastStrings
 import com.nvidia.spark.rapids.jni.GpuSubstringIndexUtils
 import com.nvidia.spark.rapids.jni.RegexRewriteUtils
-import com.nvidia.spark.rapids.shims.{ShimExpression, SparkShimImpl}
+import com.nvidia.spark.rapids.shims.{NullIntolerantShim, ShimExpression, SparkShimImpl}
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.rapids.catalyst.expressions._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.unsafe.types.UTF8String
@@ -158,7 +161,7 @@ case class GpuStartsWith(left: Expression, right: Expression)
   extends GpuBinaryExpressionArgsAnyScalar
       with Predicate
       with ImplicitCastInputTypes
-      with NullIntolerant {
+      with NullIntolerantShim {
 
   override def inputTypes: Seq[DataType] = Seq(StringType)
 
@@ -184,7 +187,7 @@ case class GpuEndsWith(left: Expression, right: Expression)
   extends GpuBinaryExpressionArgsAnyScalar
       with Predicate
       with ImplicitCastInputTypes
-      with NullIntolerant {
+      with NullIntolerantShim {
 
   override def inputTypes: Seq[DataType] = Seq(StringType)
 
@@ -391,7 +394,8 @@ case class GpuContains(left: Expression, right: Expression)
     extends GpuBinaryExpressionArgsAnyScalar
         with Predicate
         with ImplicitCastInputTypes
-        with NullIntolerant {
+        with NullIntolerantShim
+        with GpuCombinable {
 
   override def inputTypes: Seq[DataType] = Seq(StringType)
 
@@ -411,10 +415,110 @@ case class GpuContains(left: Expression, right: Expression)
       doColumnar(expandedLhs, rhs)
     }
   }
+
+  /**
+   * Get a combiner that can be used to find candidates to combine
+   */
+  override def getCombiner(): GpuExpressionCombiner = new ContainsCombiner(this)
+}
+
+case class GpuMultiContains(left: Expression, targets: Seq[UTF8String], output: StructType)
+    extends GpuExpression with ShimExpression {
+
+  override def otherCopyArgs: Seq[AnyRef] = Nil
+
+  override def dataType: DataType = output
+
+  override def nullable: Boolean = false
+
+  override def prettyName: String = "multi_contains"
+
+  override def columnarEval(batch: ColumnarBatch): GpuColumnVector = {
+    val targetsBytes = targets.map(t => t.getBytes).toArray
+    val boolCvs = withResource(ColumnVector.fromUTF8Strings(targetsBytes: _*)) { targetsCv =>
+      withResource(left.columnarEval(batch)) { lhs =>
+        lhs.getBase.stringContains(targetsCv)
+      }
+    }
+    withResource(boolCvs) { _ =>
+      val retView = ColumnView.makeStructView(batch.numRows(), boolCvs: _*)
+      GpuColumnVector.from(retView.copyToColumnVector(), dataType)
+    }
+  }
+
+  override def children: Seq[Expression] = Seq(left)
+}
+
+class ContainsCombiner(private val exp: GpuContains) extends GpuExpressionCombiner {
+  private var outputLocation = 0
+  /**
+   * A mapping between an expression and where in the output struct of
+   * the MultiGetJsonObject will the output be.
+   */
+  private val toCombine = mutable.HashMap.empty[GpuExpressionEquals, Int]
+  addExpression(exp)
+
+  override def toString: String = s"ContainsCombiner $toCombine"
+
+  override def hashCode: Int = {
+    // We already know that we are Contains, and what we can combine is based
+    // on the string column being the same.
+    "Contains".hashCode + (exp.left.semanticHash() * 17)
+  }
+
+  /**
+   * only combine when targets are literals
+   */
+  override def equals(o: Any): Boolean = o match {
+    case other: ContainsCombiner => exp.left.semanticEquals(other.exp.left) &&
+        exp.right.isInstanceOf[GpuLiteral] && other.exp.right.isInstanceOf[GpuLiteral]
+    case _ => false
+  }
+
+  override def addExpression(e: Expression): Unit = {
+    val localOutputLocation = outputLocation
+    outputLocation += 1
+    val key = GpuExpressionEquals(e)
+    if (!toCombine.contains(key)) {
+      toCombine.put(key, localOutputLocation)
+    }
+  }
+
+  override def useCount: Int = toCombine.size
+
+  private def fieldName(id: Int): String =
+    s"_mc_$id"
+
+  @tailrec
+  private def extractLiteral(exp: Expression): GpuLiteral = exp match {
+    case l: GpuLiteral => l
+    case a: Alias => extractLiteral(a.child)
+    case other => throw new RuntimeException("Unsupported expression in contains combiner, " +
+        "should be a literal type, actual type is " + other.getClass.getName)
+  }
+
+  private lazy val multiContains: GpuMultiContains = {
+    val input = toCombine.head._1.e.asInstanceOf[GpuContains].left
+    val fieldsNPaths = toCombine.toSeq.map {
+      case (k, id) =>
+        (id, k.e)
+    }.sortBy(_._1).map {
+      case (id, e: GpuContains) =>
+        val target = extractLiteral(e.right).value.asInstanceOf[UTF8String]
+        (StructField(fieldName(id), e.dataType, e.nullable), target)
+    }
+    val dt = StructType(fieldsNPaths.map(_._1))
+    GpuMultiContains(input, fieldsNPaths.map(_._2), dt)
+  }
+
+  override def getReplacementExpression(e: Expression): Expression = {
+    val localId = toCombine(GpuExpressionEquals(e))
+    GpuGetStructField(multiContains, localId, Some(fieldName(localId)))
+  }
 }
 
 case class GpuSubstring(str: Expression, pos: Expression, len: Expression)
-  extends GpuTernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuTernaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def dataType: DataType = str.dataType
 
@@ -647,7 +751,7 @@ case class GpuInitCap(child: Expression) extends GpuUnaryExpression with Implici
 }
 
 case class GpuStringRepeat(input: Expression, repeatTimes: Expression)
-    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
   override def left: Expression = input
   override def right: Expression = repeatTimes
   override def dataType: DataType = input.dataType
@@ -865,7 +969,7 @@ object CudfRegexp {
 case class GpuLike(left: Expression, right: Expression, escapeChar: Char)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
-      with NullIntolerant  {
+      with NullIntolerantShim {
 
   def this(left: Expression, right: Expression) = this(left, right, '\\')
 
@@ -1097,7 +1201,7 @@ class GpuRLikeMeta(
           GpuRLike(lhs, rhs, patternStr)
         }
         case StartsWith(s) => GpuStartsWith(lhs, GpuLiteral(s, StringType))
-        case Contains(s) => GpuContains(lhs, GpuLiteral(s, StringType))
+        case Contains(s) => GpuContains(lhs, GpuLiteral(UTF8String.fromString(s), StringType))
         case MultipleContains(ls) => GpuMultipleContains(lhs, ls)
         case PrefixRange(s, length, start, end) =>
           GpuLiteralRangePattern(lhs, GpuLiteral(s, StringType), length, start, end)
@@ -1109,12 +1213,13 @@ class GpuRLikeMeta(
 case class GpuRLike(left: Expression, right: Expression, pattern: String)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
-      with NullIntolerant  {
+      with NullIntolerantShim {
 
   override def toString: String = s"$left gpurlike $right"
 
   override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
-    lhs.getBase.containsRe(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE))
+    lhs.getBase.containsRe(new RegexProgram(pattern,
+      EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE))
   }
 
   override def doColumnar(numRows: Int, lhs: GpuScalar, rhs: GpuScalar): ColumnVector = {
@@ -1129,7 +1234,7 @@ case class GpuRLike(left: Expression, right: Expression, pattern: String)
 }
 
 case class GpuMultipleContains(input: Expression, searchList: Seq[String])
-  extends GpuUnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuUnaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def dataType: DataType = BooleanType
 
@@ -1157,7 +1262,7 @@ case class GpuMultipleContains(input: Expression, searchList: Seq[String])
 
 case class GpuLiteralRangePattern(left: Expression, right: Expression, 
     length: Int, start: Int, end: Int)
-  extends GpuBinaryExpressionArgsAnyScalar with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuBinaryExpressionArgsAnyScalar with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def dataType: DataType = BooleanType
 
@@ -1231,7 +1336,8 @@ case class GpuRegExpReplace(
             throw new IllegalStateException("Need a replace")
         }
       case _ =>
-        val prog = new RegexProgram(cudfRegexPattern, CaptureGroups.NON_CAPTURE)
+        val prog = new RegexProgram(cudfRegexPattern,
+          EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE)
         if (SparkShimImpl.reproduceEmptyStringBug &&
             GpuRegExpUtils.isEmptyRepetition(javaRegexpPattern)) {
           val isEmpty = withResource(strExpr.getBase.getCharLengths) { len =>
@@ -1275,7 +1381,7 @@ case class GpuRegExpReplaceWithBackref(
   override def dataType: DataType = StringType
 
   override protected def doColumnar(input: GpuColumnVector): ColumnVector = {
-    val prog = new RegexProgram(cudfRegexPattern)
+    val prog = new RegexProgram(cudfRegexPattern, EnumSet.of(RegexFlag.EXT_NEWLINE))
     if (SparkShimImpl.reproduceEmptyStringBug &&
         GpuRegExpUtils.isEmptyRepetition(javaRegexpPattern)) {
       val isEmpty = withResource(input.getBase.getCharLengths) { len =>
@@ -1369,7 +1475,7 @@ case class GpuRegExpExtract(
     subject: Expression,
     regexp: Expression,
     idx: Expression)(cudfRegexPattern: String)
-  extends GpuRegExpTernaryBase with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuRegExpTernaryBase with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def otherCopyArgs: Seq[AnyRef] = cudfRegexPattern :: Nil
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, IntegerType)
@@ -1416,7 +1522,8 @@ case class GpuRegExpExtract(
     // | 'a1a'  | '1'   | '1'   |
     // | '1a1'  | ''    | NULL  |
 
-    withResource(str.getBase.extractRe(new RegexProgram(extractPattern))) { extract =>
+    withResource(str.getBase.extractRe(new RegexProgram(extractPattern,
+      EnumSet.of(RegexFlag.EXT_NEWLINE)))) { extract =>
       withResource(GpuScalar.from("", DataTypes.StringType)) { emptyString =>
         val outputNullAndInputNotNull =
           withResource(extract.getColumn(groupIndex).isNull) { outputNull =>
@@ -1497,7 +1604,7 @@ case class GpuRegExpExtractAll(
     str: Expression,
     regexp: Expression,
     idx: Expression)(cudfRegexPattern: String)
-  extends GpuRegExpTernaryBase with ImplicitCastInputTypes with NullIntolerant {
+  extends GpuRegExpTernaryBase with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def otherCopyArgs: Seq[AnyRef] = cudfRegexPattern :: Nil
   override def dataType: DataType = ArrayType(StringType, containsNull = true)
@@ -1514,7 +1621,8 @@ case class GpuRegExpExtractAll(
       idx: GpuScalar): ColumnVector = {
     idx.getValue.asInstanceOf[Int] match {
       case 0 =>
-        val prog = new RegexProgram(cudfRegexPattern, CaptureGroups.NON_CAPTURE)
+        val prog = new RegexProgram(cudfRegexPattern,
+          EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE)
         str.getBase.extractAllRecord(prog, 0)
       case _ =>
         // Extract matches corresponding to idx. cuDF's extract_all_record does not support
@@ -1529,7 +1637,7 @@ case class GpuRegExpExtractAll(
         // 2nd element afterwards from the cuDF list
 
         val rowCount = str.getRowCount
-        val prog = new RegexProgram(cudfRegexPattern)
+        val prog = new RegexProgram(cudfRegexPattern, EnumSet.of(RegexFlag.EXT_NEWLINE))
 
         val extractedWithNulls = withResource(
           // Now the index is always 1 because we have transpiled all the capture groups to the
@@ -1629,7 +1737,7 @@ case class GpuSubstringIndex(strExpr: Expression,
 trait BasePad
     extends GpuTernaryExpressionArgsAnyScalarScalar
         with ImplicitCastInputTypes
-        with NullIntolerant {
+        with NullIntolerantShim {
   val str: Expression
   val len: Expression
   val pad: Expression
@@ -1795,7 +1903,8 @@ case class GpuStringSplit(str: Expression, regex: Expression, limit: Expression,
       case 0 =>
         // Same as splitting as many times as possible
         if (isRegExp) {
-          str.getBase.stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), -1)
+          str.getBase.stringSplitRecord(new RegexProgram(pattern,
+            EnumSet.of(RegexFlag.EXT_NEWLINE) ,CaptureGroups.NON_CAPTURE), -1)
         } else {
           str.getBase.stringSplitRecord(pattern, -1)
         }
@@ -1810,7 +1919,8 @@ case class GpuStringSplit(str: Expression, regex: Expression, limit: Expression,
         }
       case n =>
         if (isRegExp) {
-          str.getBase.stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), n)
+          str.getBase.stringSplitRecord(new RegexProgram(pattern,
+            EnumSet.of(RegexFlag.EXT_NEWLINE) ,CaptureGroups.NON_CAPTURE), n)
         } else {
           str.getBase.stringSplitRecord(pattern, n)
         }
@@ -1923,7 +2033,8 @@ case class GpuStringToMap(strExpr: Expression,
   private def toMap(str: GpuColumnVector): GpuColumnVector = {
     // Firstly, split the input strings into lists of strings.
     val listsOfStrings = if (isPairDelimRegExp) {
-      str.getBase.stringSplitRecord(new RegexProgram(pairDelim, CaptureGroups.NON_CAPTURE))
+      str.getBase.stringSplitRecord(new RegexProgram(pairDelim,
+        EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE))
     } else {
       str.getBase.stringSplitRecord(pairDelim)
     }
@@ -1932,7 +2043,8 @@ case class GpuStringToMap(strExpr: Expression,
       withResource(listsOfStrings.getChildColumnView(0)) { stringsCol =>
         // Split the key-value strings into pairs of strings of key-value (using limit = 2).
         val keysValuesTable = if (isKeyValueDelimRegExp) {
-          stringsCol.stringSplit(new RegexProgram(keyValueDelim, CaptureGroups.NON_CAPTURE), 2)
+          stringsCol.stringSplit(new RegexProgram(keyValueDelim,
+            EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE), 2)
         } else {
           stringsCol.stringSplit(keyValueDelim, 2)
         }
@@ -1994,7 +2106,7 @@ object GpuStringInstr {
 case class GpuStringInstr(str: Expression, substr: Expression)
   extends GpuBinaryExpressionArgsAnyScalar
       with ImplicitCastInputTypes
-      with NullIntolerant  {
+      with NullIntolerantShim {
   // Locate the position of the first occurrence of substr column in the given string.
   // returns null if one of the arguments is null
   // returns zero if not found
@@ -2124,7 +2236,7 @@ case class GpuConv(num: Expression, fromBase: Expression, toBase: Expression)
 }
 
 case class GpuFormatNumber(x: Expression, d: Expression)
-    extends GpuBinaryExpression with ExpectsInputTypes with NullIntolerant {
+    extends GpuBinaryExpression with ExpectsInputTypes with NullIntolerantShim {
   
   override def left: Expression = x
   override def right: Expression = d
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala
index 97d271b076f..0dd048967a8 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/test/cpuJsonExpressions.scala
@@ -203,7 +203,7 @@ object GetJsonObjectMask {
       oneToOneMap: Map[Char, Char],
       digitMap: Map[Char, Char]): String = {
     if (originStr != null) {
-      val buf = new StringBuffer(originStr.length)
+      val buf = new StringBuilder(originStr.length)
       var idx = 0
       while (idx < originStr.length) {
         val originChar = originStr(idx)
diff --git a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java
index 9692737ab62..38ae41e1124 100644
--- a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java
+++ b/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/ShimSupportsRuntimeFiltering.java
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims;
diff --git a/sql-plugin/src/main/spark330cdh/scala/com/nvidia/spark/rapids/spark330cdh/RapidsShuffleManager.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/RapidsShuffleManager.scala
similarity index 63%
rename from sql-plugin/src/main/spark330cdh/scala/com/nvidia/spark/rapids/spark330cdh/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/RapidsShuffleManager.scala
index fce09ec1eb6..155eb35a516 100644
--- a/sql-plugin/src/main/spark330cdh/scala/com/nvidia/spark/rapids/spark330cdh/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/RapidsShuffleManager.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,9 +15,35 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
 {"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "350db143"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "400"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark330cdh
+package com.nvidia.spark.rapids.$_spark.version.classifier_
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
index f6d3d0b8a99..38fb5e2762f 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
index 24e82e7d402..8c22b51fbd4 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
index 42b7817e87f..72760544f06 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
@@ -34,9 +34,11 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
@@ -46,4 +48,4 @@ final class CudfUnsafeRow(
    attributes: Array[Attribute],
    remapping: Array[Int]) extends CudfUnsafeRowBase(attributes, remapping)
 
-object CudfUnsafeRow extends CudfUnsafeRowTrait
\ No newline at end of file
+object CudfUnsafeRow extends CudfUnsafeRowTrait
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala
index 0973561d56d..ae689f9e712 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
@@ -240,4 +243,4 @@ trait CudfUnsafeRowTrait {
     val bitSetWidthInBytes = calculateBitSetWidthInBytes(attributes.length)
     alignOffset(offset + bitSetWidthInBytes, 8)
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DateTimeUtilsShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DateTimeUtilsShims.scala
index ca69e70fb05..c7d14935e24 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DateTimeUtilsShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DateTimeUtilsShims.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
index 0cd429ebb0a..245de9ddd1a 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
index 5f1570cfc48..4fc5c3efaf2 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
index 32ca03974bf..77825650952 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
@@ -33,16 +33,14 @@
 {"spark": "341"}
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
 import ai.rapids.cudf._
 import com.nvidia.spark.rapids.Arm._
 
-import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
-
 object GetSequenceSize {
-  val TOO_LONG_SEQUENCE = s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH"
   /**
    * Compute the size of each sequence according to 'start', 'stop' and 'step'.
    * A row (Row[start, stop, step]) contains at least one null element will produce
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
index 2752c89463a..563470e9b69 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
@@ -34,6 +34,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala
index db57c63ba07..b08c3828683 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExecBase.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
index e8ce04c5b65..859f80820ce 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
@@ -33,9 +33,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
index fd7eaf3bd36..245c2501c4d 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
@@ -31,9 +31,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala
index ca0c8a60c01..e0a7a910c14 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala
@@ -31,9 +31,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala
index 4fe744f86fa..4bf6c9b5967 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReaderBase.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala
index 527c8392993..7594a324c47 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuParquetCrypto.scala
@@ -31,9 +31,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
index 3897bfaa4de..a82a0772131 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala
index d1d5d0de056..feef4bafe97 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HashUtils.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
index 91f9e492ec6..78eac80e5a3 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
@@ -24,6 +24,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
index 5c176c48ce2..f467f440e77 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala
new file mode 100644
index 00000000000..b6f8aa79edc
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "350db143"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.catalyst.expressions.NullIntolerant
+
+trait NullIntolerantShim extends NullIntolerant
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
index 1708cf194bb..530b1af358f 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
@@ -42,4 +43,4 @@ import org.apache.commons.io.output.NullOutputStream
 
 object NullOutputStreamShim {
   def INSTANCE = NullOutputStream.NULL_OUTPUT_STREAM
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala
index d503eb77bb3..549f27b6623 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OffsetWindowFunctionMeta.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala
index d8031d96309..e1b27f32d8a 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcCastingShims.scala
@@ -32,9 +32,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala
index 5f47bc66ce7..d5aadda6b8a 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/OrcShims320untilAllBase.scala
@@ -32,9 +32,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
index 29938a7c10b..493e488e6a3 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
@@ -34,9 +34,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
index 8bed67b6e7b..8f1faf2c77c 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
@@ -34,6 +34,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
index 952fc3781f1..331e143ff6b 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
@@ -34,9 +34,11 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala
index 1dffdf66c8c..78303640c82 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RapidsCsvScanMeta.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala
index f0320101f3a..52d1e70aa58 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/RebaseShims.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala
new file mode 100644
index 00000000000..d8ac75aff2e
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/SequenceSizeTooLongErrorBuilder.scala
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "350"}
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+
+trait SequenceSizeTooLongErrorBuilder {
+
+  def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = {
+    // For these Spark versions, the sequence length and function name
+    // do not appear in the exception message.
+    s"Too long sequence found. Should be <= $MAX_ROUNDED_ARRAY_LENGTH"
+  }
+}
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala
index 86be8bc4f2d..213f1205db7 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimAQEShuffleReadExec.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala
index 5e371cb4ac4..db2959ad14d 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBaseSubqueryExec.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
index 9c5566f6da5..badfe383018 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
index 66c709caa7a..a39aa5eddb7 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala
index acab768cd48..272f92113ba 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ShimPredicateHelper.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala
index f9abf836b3a..9bf1dbc4774 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusNonDBShims.scala
@@ -31,9 +31,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala
index ab34a37c625..28497a03f48 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/Spark320PlusShims.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
index d921784e010..a865e0e88c6 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
@@ -34,9 +34,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala
index 93f061172af..6adca2ec151 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TreeNode.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala
index 8237da81501..9f649528d23 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/TypeSigUtil.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/XxHash64Shims.scala
similarity index 95%
rename from sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala
rename to sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/XxHash64Shims.scala
index 2b935793de1..18959d2c10f 100644
--- a/sql-plugin/src/main/spark320/java/com/nvidia/spark/rapids/shims/XxHash64Shims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/XxHash64Shims.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala
index 600d42cb0c2..bef82cfd0b3 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/YearParseUtil.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala
index efa54c2f86c..96365ee261d 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/extractValueShims.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala
index 443daa815d5..62eab1c6153 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/gpuWindows.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala
deleted file mode 100644
index 96097843583..00000000000
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/spark320/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "320"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark320
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala
index f7f164d530c..f9767504d99 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/v1FallbackWriters.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
@@ -96,9 +99,9 @@ trait GpuV1FallbackWriters extends LeafV2CommandExec with SupportsV1Write with G
   def write: V1Write
 
   override def run(): Seq[InternalRow] = {
-    val writtenRows = writeWithV1(write.toInsertableRelation)
+    writeWithV1(write.toInsertableRelation)
     refreshCache()
-    writtenRows
+    Nil
   }
 
   override def internalDoExecuteColumnar(): RDD[ColumnarBatch] = {
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala
index 3d68c919695..3d14707c88a 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleBlockResolver.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
index 7477ad50b7a..b4f6246abe1 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala
index c6650255edb..9b63c696371 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/ShuffledBatchRDDUtil.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala
index 318ca3656b2..e65b5da2840 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/rapids/shims/storage/ShimDiskBlockManager.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.rapids.shims.storage
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
index 8b6e43fe60b..b82fd17a0b8 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
@@ -35,6 +35,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
index 94e54bda913..6f8c7dbfae9 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
index be7d74bb82c..cf3a7f58c7a 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
index ea3f593a93a..5809d2679ff 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
@@ -33,7 +33,9 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
index 40de4f6d329..9588ff5ef73 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala
new file mode 100644
index 00000000000..2bc7181cead
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "350db143"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.nvidia
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+object DFUDFShims {
+  def columnToExpr(c: Column): Expression = c.expr
+  def exprToColumn(e: Expression): Column = Column(e)
+}
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala
new file mode 100644
index 00000000000..cae48a4d5de
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+
+package org.apache.spark.sql.rapids
+
+import scala.collection.mutable
+
+import ai.rapids.cudf.{NvtxColor, NvtxRange}
+import com.nvidia.spark.rapids._
+import com.nvidia.spark.rapids.shuffle.{RapidsShuffleServer, RapidsShuffleTransport}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.shuffle.ShuffleWriter
+import org.apache.spark.storage._
+
+
+abstract class RapidsShuffleWriter[K, V]()
+      extends ShuffleWriter[K, V]
+        with Logging {
+  protected var myMapStatus: Option[MapStatus] = None
+  protected val diskBlockObjectWriters = new mutable.HashMap[Int, (Int, DiskBlockObjectWriter)]()
+  /**
+   * Are we in the process of stopping? Because map tasks can call stop() with success = true
+   * and then call stop() with success = false if they get an exception, we want to make sure
+   * we don't try deleting files, etc twice.
+   */
+  private var stopping = false
+
+  def getMapStatus(
+    loc: BlockManagerId,
+    uncompressedSizes: Array[Long],
+    mapTaskId: Long): MapStatus = {
+    MapStatus(loc, uncompressedSizes, mapTaskId)
+  }
+
+  override def stop(success: Boolean): Option[MapStatus] = {
+    if (stopping) {
+      None
+    } else {
+      stopping = true
+      if (success) {
+        if (myMapStatus.isEmpty) {
+          // should not happen, but adding it just in case (this differs from Spark)
+          cleanupTempData()
+          throw new IllegalStateException("Cannot call stop(true) without having called write()");
+        }
+        myMapStatus
+      } else {
+        cleanupTempData()
+        None
+      }
+    }
+  }
+
+  private def cleanupTempData(): Unit = {
+    // The map task failed, so delete our output data.
+    try {
+      diskBlockObjectWriters.values.foreach { case (_, writer) =>
+        val file = writer.revertPartialWritesAndClose()
+        if (!file.delete()) logError(s"Error while deleting file ${file.getAbsolutePath()}")
+      }
+    } finally {
+      diskBlockObjectWriters.clear()
+    }
+  }
+}
+
+abstract class RapidsCachingWriterBase[K, V](
+   blockManager: BlockManager,
+   handle: GpuShuffleHandle[K, V],
+   mapId: Long,
+   rapidsShuffleServer: Option[RapidsShuffleServer],
+   catalog: ShuffleBufferCatalog)
+  extends ShuffleWriter[K, V]
+    with Logging {
+  protected val numParts = handle.dependency.partitioner.numPartitions
+  protected val sizes = new Array[Long](numParts)
+
+  /**
+   * Used to remove shuffle buffers when the writing task detects an error, calling `stop(false)`
+   */
+  private def cleanStorage(): Unit = {
+    catalog.removeCachedHandles()
+  }
+
+  override def stop(success: Boolean): Option[MapStatus] = {
+    val nvtxRange = new NvtxRange("RapidsCachingWriter.close", NvtxColor.CYAN)
+    try {
+      if (!success) {
+        cleanStorage()
+        None
+      } else {
+        // upon seeing this port, the other side will try to connect to the port
+        // in order to establish an UCX endpoint (on demand), if the topology has "rapids" in it.
+        val shuffleServerId = if (rapidsShuffleServer.isDefined) {
+          val originalShuffleServerId = rapidsShuffleServer.get.originalShuffleServerId
+          val server = rapidsShuffleServer.get
+          BlockManagerId(
+            originalShuffleServerId.executorId,
+            originalShuffleServerId.host,
+            originalShuffleServerId.port,
+            Some(s"${RapidsShuffleTransport.BLOCK_MANAGER_ID_TOPO_PREFIX}=${server.getPort}"))
+        } else {
+          blockManager.shuffleServerId
+        }
+        logInfo(s"Done caching shuffle success=$success, server_id=$shuffleServerId, "
+          + s"map_id=$mapId, sizes=${sizes.mkString(",")}")
+        Some(MapStatus(shuffleServerId, sizes, mapId))
+      }
+    } finally {
+      nvtxRange.close()
+    }
+  }
+
+}
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/arithmetic.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/arithmetic.scala
index c3b3b188013..eb265690f45 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/arithmetic.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/arithmetic.scala
@@ -32,13 +32,13 @@ spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
 
 import com.nvidia.spark.rapids._
-import com.nvidia.spark.rapids.shims.ShimExpression
+import com.nvidia.spark.rapids.shims.{NullIntolerantShim, ShimExpression}
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-abstract class CudfBinaryArithmetic extends CudfBinaryOperator with NullIntolerant {
+abstract class CudfBinaryArithmetic extends CudfBinaryOperator with NullIntolerantShim {
 
   protected val failOnError: Boolean
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
index 4cd1aebdd20..7b14be876ad 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
@@ -32,6 +32,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala
index f4829c850ec..01f84cbe1bd 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala
@@ -31,9 +31,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
index 6e94fb844fa..119801738c3 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
@@ -34,9 +34,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
index c2d8f33633d..4858d496966 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
@@ -34,9 +34,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
index 971b0eedd0e..309e7608ff3 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
@@ -34,6 +34,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
index 8b7e0e8cb55..8c6e6bccf24 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
@@ -34,9 +34,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
index a597ae130a0..9818bcd7efb 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
@@ -32,9 +32,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
index 6ec33305aaa..28099962f64 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
index 8496061dc86..e77fdc81ebf 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala
index c71288f576f..a5ddd9a5b7f 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/AvroUtils.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
index 28839326183..4f7928f69e2 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
@@ -35,6 +35,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
index 6ae300cb108..fc2aa6c4851 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
@@ -27,12 +27,13 @@ package org.apache.spark.sql.rapids.shims
 import ai.rapids.cudf.{ColumnVector, DType, Scalar}
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm._
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
 case class GpuAscii(child: Expression) extends GpuUnaryExpression with ImplicitCastInputTypes 
-    with NullIntolerant {
+    with NullIntolerantShim {
 
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
index c365ebe5c5d..1123f1013f9 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
@@ -105,7 +105,7 @@ case class GpuCreateDataSourceTableAsSelectCommand(
 
       result match {
         case _: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
-          sparkSession.sqlContext.conf.manageFilesourcePartitions =>
+          sparkSession.sessionState.conf.manageFilesourcePartitions =>
           // Need to recover partitions into the metastore so our saved data is visible.
           sessionState.executePlan(
             SparkShimImpl.v1RepairTableCommand(table.identifier)).toRdd
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
index 9482b8f594f..432b28e6032 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
@@ -35,6 +35,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 68a6ce30569..dd387d453b5 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
 
-object RapidsErrorUtils extends RapidsQueryErrorUtils {
+object RapidsErrorUtils extends RapidsQueryErrorUtils with SequenceSizeTooLongErrorBuilder {
   def invalidArrayIndexError(index: Int, numElements: Int,
       isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = {
     // Follow the Spark string format before 3.3.0
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
index a7c9d8e2998..3705f110cdd 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala
index 4851713ae50..49bf12d5c5e 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedWriter.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala
index f05d318c671..d86656e3072 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/Spark32XShimsUtils.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala
index f4683903d8a..02705a88fe4 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/datetimeExpressions.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/misc.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/misc.scala
index f0b2d8dcf67..af5d8b000ec 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/misc.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/misc.scala
@@ -34,9 +34,11 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala
index 0e87f02eb56..451061924ab 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsPushBasedFetchHelper.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.storage
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala
index b1e787fe64f..2cba8ebb6ea 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/storage/RapidsShuffleBlockFetcherIterator.scala
@@ -35,9 +35,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.storage
diff --git a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala b/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala
index c747ae4da55..57fbb1d9052 100644
--- a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala
+++ b/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/shims/Spark321PlusShims.scala
@@ -34,9 +34,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/spark321/RapidsShuffleManager.scala b/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/spark321/RapidsShuffleManager.scala
deleted file mode 100644
index 2dc21a1f2b9..00000000000
--- a/sql-plugin/src/main/spark321/scala/com/nvidia/spark/rapids/spark321/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "321"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark321
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark322/scala/com/nvidia/spark/rapids/spark322/RapidsShuffleManager.scala b/sql-plugin/src/main/spark322/scala/com/nvidia/spark/rapids/spark322/RapidsShuffleManager.scala
deleted file mode 100644
index 998e742fd91..00000000000
--- a/sql-plugin/src/main/spark322/scala/com/nvidia/spark/rapids/spark322/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "322"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark322
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark323/scala/com/nvidia/spark/rapids/spark323/RapidsShuffleManager.scala b/sql-plugin/src/main/spark323/scala/com/nvidia/spark/rapids/spark323/RapidsShuffleManager.scala
deleted file mode 100644
index 968a3d942cd..00000000000
--- a/sql-plugin/src/main/spark323/scala/com/nvidia/spark/rapids/spark323/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "323"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark323
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala b/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
index f9b01e0154a..9c3d699499c 100644
--- a/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
+++ b/sql-plugin/src/main/spark323/scala/org/apache/spark/sql/rapids/shims/GpuAscii.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
@@ -39,12 +42,13 @@ package org.apache.spark.sql.rapids.shims
 import ai.rapids.cudf.{ColumnVector, DType, Scalar}
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm._
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
 
 case class GpuAscii(child: Expression) extends GpuUnaryExpression with ImplicitCastInputTypes 
-    with NullIntolerant {
+    with NullIntolerantShim {
 
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
diff --git a/sql-plugin/src/main/spark324/scala/com/nvidia/spark/rapids/spark324/RapidsShuffleManager.scala b/sql-plugin/src/main/spark324/scala/com/nvidia/spark/rapids/spark324/RapidsShuffleManager.scala
deleted file mode 100644
index 492b7a9b7f8..00000000000
--- a/sql-plugin/src/main/spark324/scala/com/nvidia/spark/rapids/spark324/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "324"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark324
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala
index e59573bdcc0..049ffa97b90 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilter.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala
index 350a1c32109..ae1fe0b2a47 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuBloomFilterMightContain.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala
index 2926b474f17..c8add7d4538 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/GpuInSubqueryExec.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala
index 3a735686c5e..0c522df0ec7 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/AnsiUtil.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala
index 4aa9f1c06a9..647802bf881 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BloomFilterShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BucketingUtilsShim.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BucketingUtilsShim.scala
index 109a638d158..e0d8fcf8708 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BucketingUtilsShim.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/BucketingUtilsShim.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala
index de58de2fa1b..f79479d050d 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/CharVarcharUtilsShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala
index 4356f67c576..591da3a8881 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DayTimeIntervalShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
index f6fd27dd377..088b254d68b 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala
index 23cfc01275b..fd4726a995a 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/FilteredPartitions.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala
index c5f5803c6c1..8ea99f0248f 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuDataSourceRDD.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala
index 1278c26d3de..54c1384664c 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuHashPartitioning.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
index 20bcd108bcd..9bf64d63869 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
@@ -28,6 +28,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala
index 4be9f914b41..463834fccdc 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtilsBase.scala
@@ -28,9 +28,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala
index 4e8e62e943e..e800ac54ad5 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuRangePartitioning.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala
index afb39e22212..2bba890ff0d 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/GpuTypeShims.scala
@@ -28,9 +28,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
index 3e2bf898ff8..0e7b922c614 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/InSubqueryShims.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala
index 056b44803d1..1d28bccc9b1 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcReadingShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala
index e8b24305d13..65b62dd1bcc 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/OrcShims.scala
@@ -26,9 +26,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala
index 3fc1ec35166..16ad098f2c8 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetFieldIdShims.scala
@@ -28,9 +28,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
index acbd921232d..5845b4805b9 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
index c80e7a40942..1f2f8496056 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
@@ -29,6 +29,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala
index 9833a076af2..32eb915ed65 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsFileSourceMetaUtils.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala
index 133a35fe592..7f0c3d7456d 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsOrcScanMeta.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala
index 088fd2e8d05..f65055a63ef 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RapidsParquetScanMeta.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala
index efd2b47cbfa..e5217c8820d 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/RoundingShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala
index 52109644655..0cafb47570a 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ScanExecShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala
index 9169edcc556..7361b0d512e 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusNonDBShims.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala
index 3882090501a..f2f0f970c01 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/Spark330PlusShims.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/spark330/RapidsShuffleManager.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/spark330/RapidsShuffleManager.scala
deleted file mode 100644
index 96803de4ed3..00000000000
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/spark330/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "330"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark330
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala
new file mode 100644
index 00000000000..a14ebb1eb54
--- /dev/null
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.time.ZoneId
+
+import org.apache.parquet.VersionParser.ParsedVersion
+import org.apache.parquet.column.ColumnDescriptor
+import org.apache.parquet.column.page.PageReadStore
+
+object RapidsVectorizedColumnReader {
+  def apply(descriptor: ColumnDescriptor,
+      isRequired: Boolean,
+      pageReadStore: PageReadStore,
+      convertTz: ZoneId,
+      datetimeRebaseMode: String,
+      datetimeRebaseTz: String,
+      int96RebaseMode: String,
+      int96RebaseTz: String,
+      writerVersion: ParsedVersion) = {
+    new VectorizedColumnReader(
+      descriptor,
+      isRequired,
+      pageReadStore,
+      null,
+      datetimeRebaseMode,
+      datetimeRebaseTz,
+      int96RebaseMode,
+      null,
+      writerVersion)
+  }
+}
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala
index 4358badaf76..a675e89e769 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/ShimCurrentBatchIterator.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.parquet
@@ -153,7 +156,7 @@ class ShimCurrentBatchIterator(
     if (!missingColumns.contains(cv.getColumn)) {
       if (cv.getColumn.isPrimitive) {
         val column = cv.getColumn
-        val reader = new VectorizedColumnReader(
+        val reader = RapidsVectorizedColumnReader(
           column.descriptor.get,
           column.required,
           pages,
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
index 85b17788f41..1d770c9a370 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
@@ -29,6 +29,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.parquet.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala
index 80c393042f5..e9013b243a9 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/aggregate/GpuBloomFilterAggregate.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.aggregate
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
index 132c152023e..5bf0c24f115 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
@@ -29,9 +29,11 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
index 4739d3859c4..5d1dbbff359 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
@@ -29,6 +29,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index e5cdcd43568..a08f38e5596 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -21,64 +21,9 @@
 {"spark": "332"}
 {"spark": "332cdh"}
 {"spark": "333"}
-{"spark": "334"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkDateTimeException
-import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
+object RapidsErrorUtils extends RapidsErrorUtils330To334Base
+  with SequenceSizeTooLongErrorBuilder
 
-object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
-
-  def mapKeyNotExistError(
-      key: String,
-      keyType: DataType,
-      origin: Origin): NoSuchElementException = {
-    QueryExecutionErrors.mapKeyNotExistError(key, keyType, origin.context)
-  }
-
-  def invalidArrayIndexError(index: Int, numElements: Int,
-      isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = {
-    if (isElementAtF) {
-      QueryExecutionErrors.invalidElementAtIndexError(index, numElements)
-    } else {
-      QueryExecutionErrors.invalidArrayIndexError(index, numElements)
-    }
-  }
-
-  def arithmeticOverflowError(
-      message: String,
-      hint: String = "",
-      errorContext: String = ""): ArithmeticException = {
-    QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext)
-  }
-
-  def cannotChangeDecimalPrecisionError(
-      value: Decimal,
-      toType: DecimalType,
-      context: String = ""): ArithmeticException = {
-    QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-      value, toType.precision, toType.scale, context
-    )
-  }
-
-  def overflowInIntegralDivideError(context: String = ""): ArithmeticException = {
-    QueryExecutionErrors.arithmeticOverflowError(
-      "Overflow in integral divide", "try_divide", context
-    )
-  }
-
-  def sparkDateTimeException(infOrNan: String): SparkDateTimeException = {
-    // These are the arguments required by SparkDateTimeException class to create error message.
-    val errorClass = "CAST_INVALID_INPUT"
-    val messageParameters = Array("DOUBLE", "TIMESTAMP", SQLConf.ANSI_ENABLED.key)
-    new SparkDateTimeException(errorClass, Array(infOrNan) ++ messageParameters)
-  }
-
-  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
-    new ArrayIndexOutOfBoundsException("SQL array indices start at 1")
-  }
-}
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala
new file mode 100644
index 00000000000..5e560faf90c
--- /dev/null
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils330To334Base.scala
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "333"}
+{"spark": "334"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.spark.SparkDateTimeException
+import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
+
+trait RapidsErrorUtils330To334Base extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
+
+  def mapKeyNotExistError(
+      key: String,
+      keyType: DataType,
+      origin: Origin): NoSuchElementException = {
+    QueryExecutionErrors.mapKeyNotExistError(key, keyType, origin.context)
+  }
+
+  def invalidArrayIndexError(index: Int, numElements: Int,
+      isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = {
+    if (isElementAtF) {
+      QueryExecutionErrors.invalidElementAtIndexError(index, numElements)
+    } else {
+      QueryExecutionErrors.invalidArrayIndexError(index, numElements)
+    }
+  }
+
+  def arithmeticOverflowError(
+      message: String,
+      hint: String = "",
+      errorContext: String = ""): ArithmeticException = {
+    QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext)
+  }
+
+  def cannotChangeDecimalPrecisionError(
+      value: Decimal,
+      toType: DecimalType,
+      context: String = ""): ArithmeticException = {
+    QueryExecutionErrors.cannotChangeDecimalPrecisionError(
+      value, toType.precision, toType.scale, context
+    )
+  }
+
+  def overflowInIntegralDivideError(context: String = ""): ArithmeticException = {
+    QueryExecutionErrors.arithmeticOverflowError(
+      "Overflow in integral divide", "try_divide", context
+    )
+  }
+
+  def sparkDateTimeException(infOrNan: String): SparkDateTimeException = {
+    // These are the arguments required by SparkDateTimeException class to create error message.
+    val errorClass = "CAST_INVALID_INPUT"
+    val messageParameters = Array("DOUBLE", "TIMESTAMP", SQLConf.ANSI_ENABLED.key)
+    new SparkDateTimeException(errorClass, Array(infOrNan) ++ messageParameters)
+  }
+
+  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
+    new ArrayIndexOutOfBoundsException("SQL array indices start at 1")
+  }
+}
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala
index 3596dce03b7..1146e74177e 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsFor330plus.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala
index eb20cc42237..d5442e7eefa 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsShuffleThreadedReader.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala
index 980ce817ee4..03783af712d 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/intervalExpressions.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
@@ -41,8 +44,9 @@ import java.math.BigInteger
 import ai.rapids.cudf.{BinaryOperable, ColumnVector, ColumnView, DType, RoundMode, Scalar}
 import com.nvidia.spark.rapids.{BoolUtils, GpuBinaryExpression, GpuColumnVector, GpuScalar}
 import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, NullIntolerant}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
 import org.apache.spark.sql.rapids.GpuDivModLike.makeZeroScalar
 import org.apache.spark.sql.types._
 
@@ -333,7 +337,8 @@ object IntervalUtils {
  */
 case class GpuMultiplyYMInterval(
     interval: Expression,
-    num: Expression) extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+    num: Expression)
+    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def left: Expression = interval
 
@@ -407,7 +412,7 @@ case class GpuMultiplyYMInterval(
 case class GpuMultiplyDTInterval(
     interval: Expression,
     num: Expression)
-    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def left: Expression = interval
 
@@ -470,7 +475,8 @@ case class GpuMultiplyDTInterval(
  */
 case class GpuDivideYMInterval(
     interval: Expression,
-    num: Expression) extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+    num: Expression)
+    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def left: Expression = interval
 
@@ -537,7 +543,7 @@ case class GpuDivideYMInterval(
 case class GpuDivideDTInterval(
     interval: Expression,
     num: Expression)
-    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+    extends GpuBinaryExpression with ImplicitCastInputTypes with NullIntolerantShim {
 
   override def left: Expression = interval
 
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala
index 8aa197ff23d..e1e1247b759 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/types/shims/PartitionValueCastShims.scala
@@ -29,9 +29,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.types.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala
index cedaee9fe69..9364fec1eda 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/DatabricksShimServiceProvider.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
index a601b3e5b7b..f3a11a9f89c 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AQEUtils.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
index 835a0d30dae..efe825394f4 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AggregationTagging.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala
index e50f922c893..3b605e88ce8 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/AnsiCastShim.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala
index 8d5b57aaa55..7cc9e7496d0 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/CastingConfigShim.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala
index 38b2138cd33..a2dbc63a056 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ColumnDefaultValuesShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala
index f4d5ac6a114..c9605263edc 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DecimalArithmeticOverrides.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
index f286718a893..6693ddc7ad9 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DeltaLakeUtils.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
index 33f86513519..5634c1910f3 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/DistributionUtil.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
index 25eaf71bede..2763af7fd56 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/FileSourceScanExecMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala
index f02d8755a6a..7db246c653a 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GetMapValueMeta.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
index 25b8a94ec66..037a1506dfd 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala
index 623484adbc5..eee11aa32e2 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuCastShims.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuRunningWindowExecMeta.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuRunningWindowExecMeta.scala
index f3f8d3630b6..71fefd561eb 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuRunningWindowExecMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuRunningWindowExecMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
index 8a197620828..6e818f52af1 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/GpuWindowInPandasExec.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala
index 38c8db30b81..c1bca25b29d 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ParquetStringPredShims.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
index bf1a9e2f529..e8d0ccd9150 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimBroadcastExchangeLike.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala
index b52b64c0e31..65127a01f5e 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimFilePartitionReaderFactory.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
index cbc99cd58b7..d72993e3539 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShimLeafExecNode.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
index 220bec0b763..7059e1664d3 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala
index fb60f15db4d..c0ba78474d8 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark321PlusDBShims.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
index 0c5594b8da0..206161c3171 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/Spark330PlusDBShims.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala
index 44ccc46bdd4..39a72f9e6ce 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/TypeUtilsShims.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/shimExpressions.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/shimExpressions.scala
index 4f2b0e80395..c2f43f424a1 100644
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/shimExpressions.scala
+++ b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/shims/shimExpressions.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/spark330db/RapidsShuffleManager.scala b/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/spark330db/RapidsShuffleManager.scala
deleted file mode 100644
index baf7bf24f83..00000000000
--- a/sql-plugin/src/main/spark330db/scala/com/nvidia/spark/rapids/spark330db/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "330db"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark330db
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta.scala
index 76255b3e5a6..ae32800e77a 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta.scala
@@ -21,105 +21,19 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
-import com.nvidia.spark.rapids.{BaseExprMeta, DataFromReplacementRule, GpuExec, RapidsConf, RapidsMeta, SparkPlanMeta}
+import com.nvidia.spark.rapids.{DataFromReplacementRule, GpuExec, RapidsConf, RapidsMeta}
 
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.physical.IdentityBroadcastMode
-import org.apache.spark.sql.execution.{SparkPlan, SubqueryBroadcastExec}
-import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec}
-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec}
-import org.apache.spark.sql.execution.joins.HashedRelationBroadcastMode
+import org.apache.spark.sql.execution.SubqueryBroadcastExec
 
-class GpuSubqueryBroadcastMeta(
-    s: SubqueryBroadcastExec,
-    conf: RapidsConf,
-    p: Option[RapidsMeta[_, _, _]],
-    r: DataFromReplacementRule) extends
-    SparkPlanMeta[SubqueryBroadcastExec](s, conf, p, r) {
-  private var broadcastBuilder: () => SparkPlan = _
-
-  override val childExprs: Seq[BaseExprMeta[_]] = Nil
-
-  override val childPlans: Seq[SparkPlanMeta[SparkPlan]] = Nil
-
-  override def tagPlanForGpu(): Unit = s.child match {
-    // DPP: For AQE off, in this case, we handle DPP by converting the underlying
-    // BroadcastExchangeExec to GpuBroadcastExchangeExec.
-    // This is slightly different from the Apache Spark case, because Spark
-    // sends the underlying plan into the plugin in advance via the PlanSubqueries rule.
-    // Here, we have the full non-GPU subquery plan, so we convert the whole
-    // thing.
-    case ex @ BroadcastExchangeExec(_, child) =>
-      val exMeta = new GpuBroadcastMeta(ex.copy(child = child), conf, p, r)
-      exMeta.tagForGpu()
-      if (exMeta.canThisBeReplaced) {
-        broadcastBuilder = () => exMeta.convertToGpu()
-      } else {
-        willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
-      }
-    // DPP: For AQE on, we have an almost completely different scenario then before,
-    // Databricks uses a BroadcastQueryStageExec and either:
-    //  1) provide an underlying BroadcastExchangeExec that we will have to convert
-    //     somehow
-    //  2) might already do the reuse work for us. The ReusedExchange is now a
-    //     part of the SubqueryBroadcast, so we send it back here as underlying the
-    //     GpuSubqueryBroadcastExchangeExec
-    case bqse: BroadcastQueryStageExec =>
-      bqse.plan match {
-        case ex: BroadcastExchangeExec =>
-          val exMeta = new GpuBroadcastMeta(ex, conf, p, r)
-          exMeta.tagForGpu()
-          if (exMeta.canThisBeReplaced) {
-            broadcastBuilder = () => exMeta.convertToGpu()
-          } else {
-            willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
-          }
-        case reuse: ReusedExchangeExec =>
-          reuse.child match {
-            case _: GpuBroadcastExchangeExec =>
-              // A BroadcastExchange has already been replaced, so it can run on the GPU
-              broadcastBuilder = () => reuse
-            case _ =>
-              willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
-          }
-      }
-    case _ =>
-      willNotWorkOnGpu("the subquery to broadcast can not entirely run in the GPU.")
-  }
-  /**
-  * Simply returns the original plan. Because its only child, BroadcastExchange, doesn't
-  * need to change if SubqueryBroadcastExec falls back to the CPU.
-  */
-  override def convertToCpu(): SparkPlan = s
+class GpuSubqueryBroadcastMeta(s: SubqueryBroadcastExec,
+                               conf: RapidsConf,
+                               p: Option[RapidsMeta[_, _, _]],
+                               r: DataFromReplacementRule)
+  extends GpuSubqueryBroadcastMeta330DBBase(s, conf, p, r) {
 
   override def convertToGpu(): GpuExec = {
     GpuSubqueryBroadcastExec(s.name, Seq(s.index), s.buildKeys, broadcastBuilder())(
       getBroadcastModeKeyExprs)
   }
 
-  /** Extract the broadcast mode key expressions if there are any. */
-  private def getBroadcastModeKeyExprs: Option[Seq[Expression]] = {
-    val broadcastMode = s.child match {
-      case b: BroadcastExchangeExec =>
-        b.mode
-      case bqse: BroadcastQueryStageExec =>
-        bqse.plan match {
-          case b: BroadcastExchangeExec =>
-            b.mode
-          case reuse: ReusedExchangeExec =>
-            reuse.child match {
-              case g: GpuBroadcastExchangeExec =>
-                g.mode
-            }
-          case _ =>
-            throw new AssertionError("should not reach here")
-        }
-    }
-
-    broadcastMode match {
-      case HashedRelationBroadcastMode(keys, _) => Some(keys)
-      case IdentityBroadcastMode => None
-      case m => throw new UnsupportedOperationException(s"Unknown broadcast mode $m")
-    }
-  }
-}
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta330DBBase.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta330DBBase.scala
new file mode 100644
index 00000000000..a6248127bad
--- /dev/null
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/GpuSubqueryBroadcastMeta330DBBase.scala
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330db"}
+{"spark": "332db"}
+{"spark": "341db"}
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.execution
+
+import com.nvidia.spark.rapids.{BaseExprMeta, DataFromReplacementRule, RapidsConf, RapidsMeta, SparkPlanMeta}
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.physical.IdentityBroadcastMode
+import org.apache.spark.sql.execution.{SparkPlan, SubqueryBroadcastExec}
+import org.apache.spark.sql.execution.adaptive.{BroadcastQueryStageExec}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec}
+import org.apache.spark.sql.execution.joins.HashedRelationBroadcastMode
+
+abstract class GpuSubqueryBroadcastMeta330DBBase(s: SubqueryBroadcastExec,
+                                        conf: RapidsConf,
+                                        p: Option[RapidsMeta[_, _, _]],
+                                        r: DataFromReplacementRule) extends
+  SparkPlanMeta[SubqueryBroadcastExec](s, conf, p, r) {
+  protected var broadcastBuilder: () => SparkPlan = _
+
+  override val childExprs: Seq[BaseExprMeta[_]] = Nil
+
+  override val childPlans: Seq[SparkPlanMeta[SparkPlan]] = Nil
+
+  override def tagPlanForGpu(): Unit = s.child match {
+    // DPP: For AQE off, in this case, we handle DPP by converting the underlying
+    // BroadcastExchangeExec to GpuBroadcastExchangeExec.
+    // This is slightly different from the Apache Spark case, because Spark
+    // sends the underlying plan into the plugin in advance via the PlanSubqueries rule.
+    // Here, we have the full non-GPU subquery plan, so we convert the whole
+    // thing.
+    case ex @ BroadcastExchangeExec(_, child) =>
+      val exMeta = new GpuBroadcastMeta(ex.copy(child = child), conf, p, r)
+      exMeta.tagForGpu()
+      if (exMeta.canThisBeReplaced) {
+        broadcastBuilder = () => exMeta.convertToGpu()
+      } else {
+        willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
+      }
+    // DPP: For AQE on, we have an almost completely different scenario then before,
+    // Databricks uses a BroadcastQueryStageExec and either:
+    //  1) provide an underlying BroadcastExchangeExec that we will have to convert
+    //     somehow
+    //  2) might already do the reuse work for us. The ReusedExchange is now a
+    //     part of the SubqueryBroadcast, so we send it back here as underlying the
+    //     GpuSubqueryBroadcastExchangeExec
+    case bqse: BroadcastQueryStageExec =>
+      bqse.plan match {
+        case ex: BroadcastExchangeExec =>
+          val exMeta = new GpuBroadcastMeta(ex, conf, p, r)
+          exMeta.tagForGpu()
+          if (exMeta.canThisBeReplaced) {
+            broadcastBuilder = () => exMeta.convertToGpu()
+          } else {
+            willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
+          }
+        case reuse: ReusedExchangeExec =>
+          reuse.child match {
+            case _: GpuBroadcastExchangeExec =>
+              // A BroadcastExchange has already been replaced, so it can run on the GPU
+              broadcastBuilder = () => reuse
+            case _ =>
+              willNotWorkOnGpu("underlying BroadcastExchange can not run in the GPU.")
+          }
+      }
+    case _ =>
+      willNotWorkOnGpu("the subquery to broadcast can not entirely run in the GPU.")
+  }
+  /**
+   * Simply returns the original plan. Because its only child, BroadcastExchange, doesn't
+   * need to change if SubqueryBroadcastExec falls back to the CPU.
+   */
+  override def convertToCpu(): SparkPlan = s
+
+  /** Extract the broadcast mode key expressions if there are any. */
+  protected def getBroadcastModeKeyExprs: Option[Seq[Expression]] = {
+    val broadcastMode = s.child match {
+      case b: BroadcastExchangeExec =>
+        b.mode
+      case bqse: BroadcastQueryStageExec =>
+        bqse.plan match {
+          case b: BroadcastExchangeExec =>
+            b.mode
+          case reuse: ReusedExchangeExec =>
+            reuse.child match {
+              case g: GpuBroadcastExchangeExec =>
+                g.mode
+            }
+          case _ =>
+            throw new AssertionError("should not reach here")
+        }
+    }
+
+    broadcastMode match {
+      case HashedRelationBroadcastMode(keys, _) => Some(keys)
+      case IdentityBroadcastMode => None
+      case m => throw new UnsupportedOperationException(s"Unknown broadcast mode $m")
+    }
+  }
+}
+
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/shims/ReuseGpuBroadcastExchangeAndSubquery.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/shims/ReuseGpuBroadcastExchangeAndSubquery.scala
index c76a5085dbb..cab4f788588 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/shims/ReuseGpuBroadcastExchangeAndSubquery.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/execution/shims/ReuseGpuBroadcastExchangeAndSubquery.scala
@@ -21,6 +21,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.shims
 
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
index ccb236e0e6c..cebe693131a 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/rapids/shims/GpuDatabricksShuffleExchangeExecBase.scala
@@ -17,6 +17,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.rapids.shims
 
@@ -55,4 +56,5 @@ abstract class GpuDatabricksShuffleExchangeExecBase(
   }
 
   override def shuffleId: Int = shuffleDependencyColumnar.shuffleId
+
 }
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
index dd04d7cfd26..f6bda664c59 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
@@ -22,6 +22,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.parquet
 
@@ -44,7 +46,7 @@ object ParquetCVShims {
     val defaultValue = if (sparkSchema != null) {
       getExistenceDefaultValues(sparkSchema)
     } else null
-    new ParquetColumnVector(column, vector, capacity, memoryMode, missingColumns, isTopLevel,
+    ShimParquetColumnVector(column, vector, capacity, memoryMode, missingColumns, isTopLevel, 
       defaultValue)
   }
 }
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala
new file mode 100644
index 00000000000..27493bfe610
--- /dev/null
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "330db"}
+{"spark": "332db"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.spark.memory.MemoryMode
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector
+
+object ShimParquetColumnVector {
+  def apply(
+    column: ParquetColumn,
+    vector: WritableColumnVector,
+    capacity: Int,
+    memoryMode: MemoryMode,
+    missingColumns: java.util.Set[ParquetColumn],
+    isTopLevel: Boolean,
+    defaultValue: Any): ParquetColumnVector = {
+    new ParquetColumnVector(column, vector, capacity, memoryMode, missingColumns, isTopLevel,
+      defaultValue)
+  }
+}
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala
index e75edc7a772..9f9dd92711c 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/execution/datasources/rapids/DataSourceStrategyUtils.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.rapids
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
index ca3fbdb7feb..e2a30be6f29 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/GpuCheckOverflowInTableInsert.scala
@@ -27,9 +27,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
index e8b6736a650..0f48f1df5c2 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/aggregate/aggregateFunctions.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.aggregate
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala
index 9b8b7e4693c..c0b62c6bffb 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/arithmetic.scala
@@ -22,9 +22,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
@@ -35,16 +38,17 @@ import ai.rapids.cudf._
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource}
 import com.nvidia.spark.rapids.RapidsPluginImplicits._
+import com.nvidia.spark.rapids.shims.NullIntolerantShim
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
-abstract class CudfBinaryArithmetic extends CudfBinaryOperator with NullIntolerant {
+abstract class CudfBinaryArithmetic extends CudfBinaryOperator with NullIntolerantShim {
 
   protected val failOnError: Boolean
 
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
index 08bab08e4dc..5244db1b642 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
@@ -92,7 +93,9 @@ case class GpuBroadcastHashJoinExec(
     JOIN_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_JOIN_TIME),
     NUM_INPUT_ROWS -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_ROWS),
     NUM_INPUT_BATCHES -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_BATCHES),
-    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME)
+    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
+    CONCAT_HEADER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_HEADER_TIME),
+    CONCAT_BUFFER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_BUFFER_TIME)
   )
 
   override def requiredChildDistribution: Seq[Distribution] = {
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
index 9c9784fc31c..507026c6a34 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
@@ -148,7 +149,9 @@ case class GpuBroadcastNestedLoopJoinExec(
     JOIN_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_JOIN_TIME),
     NUM_INPUT_ROWS -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_ROWS),
     NUM_INPUT_BATCHES -> createMetric(DEBUG_LEVEL, DESCRIPTION_NUM_INPUT_BATCHES),
-    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME)
+    CONCAT_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_TIME),
+    CONCAT_HEADER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_HEADER_TIME),
+    CONCAT_BUFFER_TIME -> createNanoTimingMetric(DEBUG_LEVEL, DESCRIPTION_CONCAT_BUFFER_TIME)
   )
 
   def isExecutorBroadcast(): Boolean = {
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala
index 5fabf05069f..487e63d4a12 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuExecutorBroadcastHelper.scala
@@ -18,15 +18,19 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
-import com.nvidia.spark.rapids.{ConcatAndConsumeAll, GpuCoalesceIterator, GpuColumnVector, GpuMetric, GpuShuffleCoalesceIterator, HostShuffleCoalesceIterator, NoopMetric, RequireSingleBatch}
+import com.nvidia.spark.rapids.{ConcatAndConsumeAll, GpuCoalesceIterator, GpuColumnVector, GpuMetric, NoopMetric, RequireSingleBatch}
 import com.nvidia.spark.rapids.Arm.withResource
+import com.nvidia.spark.rapids.CoalesceReadOption
+import com.nvidia.spark.rapids.GpuShuffleCoalesceUtils
 
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -70,14 +74,17 @@ object GpuExecutorBroadcastHelper {
     // to ensure this always a single batch for the following step.
     val shuffleMetrics = Map(
       CONCAT_TIME -> metricsMap(CONCAT_TIME),
-      OP_TIME -> metricsMap(OP_TIME)
+      CONCAT_HEADER_TIME -> metricsMap(CONCAT_HEADER_TIME),
+      CONCAT_BUFFER_TIME -> metricsMap(CONCAT_BUFFER_TIME),
+      OP_TIME -> metricsMap(OP_TIME),
     ).withDefaultValue(NoopMetric)
 
     val iter = shuffleDataIterator(shuffleData)
     new GpuCoalesceIterator(
-      new GpuShuffleCoalesceIterator(
-        new HostShuffleCoalesceIterator(iter, targetSize, shuffleMetrics),
-        dataTypes, shuffleMetrics).asInstanceOf[Iterator[ColumnarBatch]],
+      GpuShuffleCoalesceUtils.getGpuShuffleCoalesceIterator(iter, targetSize,
+        dataTypes,
+        CoalesceReadOption(SQLConf.get),
+        shuffleMetrics),
       dataTypes,
       RequireSingleBatch,
       NoopMetric, // numInputRows
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
index b090b2b8189..3042d148fae 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
index 00894db1e6f..bf72e94cb85 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 7e58a54c921..1b9bafff947 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -22,7 +22,8 @@ package org.apache.spark.sql.rapids.shims
 
 import org.apache.spark.sql.errors.QueryExecutionErrors
 
-object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils {
+object RapidsErrorUtils extends RapidsErrorUtilsBase
+  with RapidsQueryErrorUtils with SequenceSizeTooLongErrorBuilder {
   def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
     QueryExecutionErrors.elementAtByIndexZeroError(context = null)
   }
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsBase.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsBase.scala
index 53c22f3d53d..de626b5888a 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsBase.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtilsBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
 {"spark": "330db"}
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
index 0dd1e62b1d6..d59b3e33dd6 100644
--- a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/shims/Spark331PlusNonDBShims.scala
@@ -24,9 +24,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/spark331/RapidsShuffleManager.scala b/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/spark331/RapidsShuffleManager.scala
deleted file mode 100644
index adaf607c589..00000000000
--- a/sql-plugin/src/main/spark331/scala/com/nvidia/spark/rapids/spark331/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "331"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark331
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala
index 7309529b081..09049a3a432 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/CreateDataSourceTableAsSelectCommandMetaShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala
index 6d5fdff07d8..5cd4cafe7f0 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/FileIndexOptionsShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
index 5b7bf425920..d59130a9550 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuFileFormatDataWriterShim.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
index 1b9efd417d0..e7aecf0675f 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala
index fb017647024..225f0bc87ca 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuKnownNullable.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala
index 393b168d490..e987a940b70 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala
new file mode 100644
index 00000000000..8f98321b4ba
--- /dev/null
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "332db"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "350db143"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
+
+object LogicalPlanShims {
+  def getLocations(plan: LogicalPlan): Seq[FileIndex] = plan.collect {
+    case LogicalRelation(t: HadoopFsRelation, _, _, _) => t.location
+  }
+}
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
index 43ef6118746..4690af370a8 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/Spark332PlusDBShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "332db"}
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/spark332db/RapidsShuffleManager.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/spark332db/RapidsShuffleManager.scala
deleted file mode 100644
index 58be5327cc9..00000000000
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/spark332db/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "332db"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark332db
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
index fc8094b793f..2b4036e042b 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/execution/datasources/GpuWriteFiles.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
index 96046aed76c..36e04c5d57b 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
@@ -20,6 +20,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
 
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
index fdfaaffeb56..4d410a57426 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/GpuRowBasedHiveGenericUDFShim.scala
@@ -18,6 +18,7 @@
 {"spark": "332db"}
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala
index ca463559319..41a239ce0ab 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveFileUtil.scala
@@ -1,12 +1,11 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -22,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala
index c3996a83b78..ceb6ae8bc8e 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/hive/rapids/shims/HiveProviderCmdShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
index 29c72188635..fc4e9273281 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
index b44be532b52..3c950a3836b 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
@@ -98,7 +101,7 @@ case class GpuCreateDataSourceTableAsSelectCommand(
 
       result match {
         case _: HadoopFsRelation if table.partitionColumnNames.nonEmpty &&
-          sparkSession.sqlContext.conf.manageFilesourcePartitions =>
+          sparkSession.sessionState.conf.manageFilesourcePartitions =>
           // Need to recover partitions into the metastore so our saved data is visible.
           sessionState.executePlan(
             SparkShimImpl.v1RepairTableCommand(table.identifier)).toRdd
@@ -135,4 +138,4 @@ case class GpuCreateDataSourceTableAsSelectCommand(
         throw ex
     }
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala
index 37395a72d2f..758a6826320 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuDataSource.scala
@@ -21,13 +21,17 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
 
+import com.nvidia.spark.rapids.shims.LogicalPlanShims
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql._
@@ -82,9 +86,7 @@ case class GpuDataSource(
     PartitioningUtils.validatePartitionColumn(data.schema, partitionColumns, caseSensitive)
 
     val fileIndex = catalogTable.map(_.identifier).map { tableIdent =>
-      sparkSession.table(tableIdent).queryExecution.analyzed.collect {
-        case LogicalRelation(t: HadoopFsRelation, _, _, _) => t.location
-      }.head
+      LogicalPlanShims.getLocations(sparkSession.table(tableIdent).queryExecution.analyzed).head
     }
 
     // For partitioned relation r, r.schema's column ordering can be different from the column
@@ -121,16 +123,18 @@ case class GpuDataSource(
       data: LogicalPlan,
       outputColumnNames: Seq[String]): BaseRelation = {
 
-    val outputColumns = DataWritingCommand.logicalPlanOutputWithNames(data, outputColumnNames)
-    if (outputColumns.map(_.dataType).exists(_.isInstanceOf[CalendarIntervalType])) {
-      throw QueryCompilationErrors.cannotSaveIntervalIntoExternalStorageError()
-    }
-
     val format = originalProvidingInstance()
     if (!format.isInstanceOf[FileFormat]) {
       throw new IllegalArgumentException(s"Original provider does not extend FileFormat: $format")
     }
 
+    val outputColumns = DataWritingCommand.logicalPlanOutputWithNames(data, outputColumnNames)
+    outputColumns.toStructType.foreach { field =>
+      if (field.dataType.isInstanceOf[CalendarIntervalType]) {
+        throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field)
+      }
+    }
+
     val cmd = planForWritingFileFormat(format.asInstanceOf[FileFormat], mode, data)
     // Spark 3.4 doesn't need the child physical plan for metrics anymore, this is now
     // cleaned up, so we need to run the DataWritingCommand using SparkSession. This actually
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
index bff6af379b7..7d2e108ef9f 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
@@ -21,6 +21,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala
index d70e5c3c08b..4e6f890c178 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SchemaUtilsShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala
index 7b2fd52263b..fd706764e23 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkDateTimeExceptionShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala
index 59ac9bd84f5..507d7c97913 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/SparkUpgradeExceptionShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
index aba0f465483..07da94bcf07 100644
--- a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
+++ b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/shims/GetSequenceSize.scala
@@ -18,8 +18,10 @@
 {"spark": "334"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
@@ -31,8 +33,6 @@ import org.apache.spark.sql.rapids.{AddOverflowChecks, SubtractOverflowChecks}
 import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 
 object GetSequenceSize {
-  val TOO_LONG_SEQUENCE = "Unsuccessful try to create array with elements exceeding the array " +
-    s"size limit $MAX_ROUNDED_ARRAY_LENGTH"
   /**
    * Compute the size of each sequence according to 'start', 'stop' and 'step'.
    * A row (Row[start, stop, step]) contains at least one null element will produce
diff --git a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/spark334/RapidsShuffleManager.scala b/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/spark334/RapidsShuffleManager.scala
deleted file mode 100644
index 52269579bf0..00000000000
--- a/sql-plugin/src/main/spark334/scala/com/nvidia/spark/rapids/spark334/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "334"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark334
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark352/scala/com/nvidia/spark/rapids/spark352/RapidsShuffleManager.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
similarity index 63%
rename from sql-plugin/src/main/spark352/scala/com/nvidia/spark/rapids/spark352/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index e9bc9e4da8d..b91c5ed360b 100644
--- a/sql-plugin/src/main/spark352/scala/com/nvidia/spark/rapids/spark352/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -15,15 +15,10 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "352"}
+{"spark": "334"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark352
+package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+object RapidsErrorUtils extends RapidsErrorUtils330To334Base
+  with SequenceSizeTooLongUnsuccessfulErrorBuilder
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala
new file mode 100644
index 00000000000..8d590a54075
--- /dev/null
+++ b/sql-plugin/src/main/spark334/scala/org/apache/spark/sql/rapids/shims/SequenceSizeTooLongUnsuccessfulErrorBuilder.scala
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "334"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+
+trait SequenceSizeTooLongUnsuccessfulErrorBuilder {
+  def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = {
+    // The errant function's name does not feature in the exception message
+    // prior to Spark 4.0.  Neither does the attempted allocation size.
+    "Unsuccessful try to create array with elements exceeding the array " +
+      s"size limit $MAX_ROUNDED_ARRAY_LENGTH"
+  }
+}
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
index 8e5d64017df..1f1264570b2 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
@@ -20,6 +20,7 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
index f789f2d31cb..fb36768c880 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/CastCheckShims.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala
index 408d18ece70..9ef5f49aa9f 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GlobalLimitShims.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
index 4ca9586053f..a341b145849 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
@@ -20,9 +20,11 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
index 5496c3ba048..8f96fddbbb7 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBroadcastJoinMeta.scala
@@ -18,9 +18,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
index 18da13783a3..e7cf5a655ad 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
@@ -19,6 +19,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala
index 57e77499064..ecc245ca54e 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/OrcProtoWriterShim.scala
@@ -21,9 +21,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
index beee3bb20f6..4a7cf9d9797 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetLegacyNanoAsLongShims.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
index d927709160c..a8f8cf56e0e 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampAnnotationShims.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
index 6856b52cba9..8df78afc689 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ParquetTimestampNTZShims.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
index 62fe32ae8db..6ca8b532016 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -19,6 +19,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
index 19f9aa93426..2cb0af118eb 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
index 88d56f15c51..bd5c71d40a7 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/ShuffleOriginUtil.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
index 19921701814..f131c1303af 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/Spark340PlusNonDBShims.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index c2946ca8c1b..4f9a3b541f9 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -19,6 +19,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
index 2ccf3d8eb8a..80abfb4b730 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/TagScanForRuntimeFiltering.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
index ddf74821382..56552dac7b7 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleIterator.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shuffle
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/spark340/RapidsShuffleManager.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/spark340/RapidsShuffleManager.scala
deleted file mode 100644
index b46ee21bd8a..00000000000
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/spark340/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "340"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark340
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala
index d8f0cc2747b..e011cedcba9 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/csv/GpuCsvUtils.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.catalyst.csv
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
index 47b8158d0cb..1183725b5a2 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/catalyst/json/GpuJsonUtils.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.catalyst.json
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
index a655a0dc1b2..26646b7247b 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
@@ -19,6 +19,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
 
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
index d22058a3e5e..81b36afda83 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
@@ -19,6 +19,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
 
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala
index 7961fcc35c0..f7afe6aeba4 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/RapidsCachingReader.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
index c2f8b49bd53..d4531cf5d7f 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastHashJoinExec.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
index 100225008a0..66e1e2d5e4a 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/GpuBroadcastNestedLoopJoinExec.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
index fa6bc7978d1..a88c31e6985 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/execution/ShimTrampolineUtil.scala
@@ -19,9 +19,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala
index fd7c9b505ce..f3e37aac24b 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/GpuJsonToStructsShim.scala
@@ -19,9 +19,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 0bf3e66d556..815e8d9dbb0 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -17,80 +17,9 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "340"}
 {"spark": "341"}
-{"spark": "342"}
-{"spark": "343"}
 {"spark": "350"}
-{"spark": "351"}
-{"spark": "352"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkDateTimeException
-import org.apache.spark.sql.catalyst.trees.{Origin, SQLQueryContext}
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
-
-object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
-
-  def mapKeyNotExistError(
-      key: String,
-      keyType: DataType,
-      origin: Origin): NoSuchElementException = {
-    throw new UnsupportedOperationException(
-      "`mapKeyNotExistError` has been removed since Spark 3.4.0. "
-    )
-  }
-
-  def invalidArrayIndexError(
-      index: Int,
-      numElements: Int,
-      isElementAtF: Boolean = false,
-      context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = {
-    if (isElementAtF) {
-      QueryExecutionErrors.invalidElementAtIndexError(index, numElements, context)
-    } else {
-      QueryExecutionErrors.invalidArrayIndexError(index, numElements, context)
-    }
-  }
-
-  def arithmeticOverflowError(
-      message: String,
-      hint: String = "",
-      errorContext: SQLQueryContext = null): ArithmeticException = {
-    QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext)
-  }
-
-  def cannotChangeDecimalPrecisionError(
-      value: Decimal,
-      toType: DecimalType,
-      context: SQLQueryContext = null): ArithmeticException = {
-    QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-      value, toType.precision, toType.scale, context
-    )
-  }
-
-  def overflowInIntegralDivideError(context: SQLQueryContext = null): ArithmeticException = {
-    QueryExecutionErrors.arithmeticOverflowError(
-      "Overflow in integral divide", "try_divide", context
-    )
-  }
-
-  def sparkDateTimeException(infOrNan: String): SparkDateTimeException = {
-    // These are the arguments required by SparkDateTimeException class to create error message.
-    val errorClass = "CAST_INVALID_INPUT"
-    val messageParameters = Map("expression" -> infOrNan, "sourceType" -> "DOUBLE",
-      "targetType" -> "TIMESTAMP", "ansiConfig" -> SQLConf.ANSI_ENABLED.key)
-    SparkDateTimeExceptionShims.newSparkDateTimeException(errorClass, messageParameters,
-      Array.empty, "")
-  }
-
-  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
-    QueryExecutionErrors.invalidIndexOfZeroError(context = null)
-  }
-  
-  override def intervalDivByZeroError(origin: Origin): ArithmeticException = {
-    QueryExecutionErrors.intervalDividedByZeroError(origin.context)
-  }
-}
+object RapidsErrorUtils extends RapidsErrorUtils340PlusBase
+  with SequenceSizeTooLongErrorBuilder
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala
new file mode 100644
index 00000000000..c344b3a365c
--- /dev/null
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils340PlusBase.scala
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "344"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.spark.SparkDateTimeException
+import org.apache.spark.sql.catalyst.trees.{Origin, SQLQueryContext}
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
+
+trait RapidsErrorUtils340PlusBase extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
+
+  def mapKeyNotExistError(
+      key: String,
+      keyType: DataType,
+      origin: Origin): NoSuchElementException = {
+    throw new UnsupportedOperationException(
+      "`mapKeyNotExistError` has been removed since Spark 3.4.0. "
+    )
+  }
+
+  def invalidArrayIndexError(
+      index: Int,
+      numElements: Int,
+      isElementAtF: Boolean = false,
+      context: SQLQueryContext = null): ArrayIndexOutOfBoundsException = {
+    if (isElementAtF) {
+      QueryExecutionErrors.invalidElementAtIndexError(index, numElements, context)
+    } else {
+      QueryExecutionErrors.invalidArrayIndexError(index, numElements, context)
+    }
+  }
+
+  def arithmeticOverflowError(
+      message: String,
+      hint: String = "",
+      errorContext: SQLQueryContext = null): ArithmeticException = {
+    QueryExecutionErrors.arithmeticOverflowError(message, hint, errorContext)
+  }
+
+  def cannotChangeDecimalPrecisionError(
+      value: Decimal,
+      toType: DecimalType,
+      context: SQLQueryContext = null): ArithmeticException = {
+    QueryExecutionErrors.cannotChangeDecimalPrecisionError(
+      value, toType.precision, toType.scale, context
+    )
+  }
+
+  def overflowInIntegralDivideError(context: SQLQueryContext = null): ArithmeticException = {
+    QueryExecutionErrors.arithmeticOverflowError(
+      "Overflow in integral divide", "try_divide", context
+    )
+  }
+
+  def sparkDateTimeException(infOrNan: String): SparkDateTimeException = {
+    // These are the arguments required by SparkDateTimeException class to create error message.
+    val errorClass = "CAST_INVALID_INPUT"
+    val messageParameters = Map("expression" -> infOrNan, "sourceType" -> "DOUBLE",
+      "targetType" -> "TIMESTAMP", "ansiConfig" -> SQLConf.ANSI_ENABLED.key)
+    SparkDateTimeExceptionShims.newSparkDateTimeException(errorClass, messageParameters,
+      Array.empty, "")
+  }
+
+  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
+    QueryExecutionErrors.invalidIndexOfZeroError(context = null)
+  }
+
+  override def intervalDivByZeroError(origin: Origin): ArithmeticException = {
+    QueryExecutionErrors.intervalDividedByZeroError(origin.context)
+  }
+}
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
index 1bfe67cc3f2..b4b80da5b3a 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuAggregateInPandasExecMeta.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala
index 4c89b323453..18de0caa030 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuToPrettyString.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala
index 5b3bfe2137b..d8d63fe2be0 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/GpuWindowGroupLimitExec.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
index bf5567298bf..8e852f9164e 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PlanShimsImpl.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
index 4e5e01fabaf..f8ea26b2876 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PythonUDFShim.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
index 667a6912abc..37cf1953429 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/Spark341PlusDBShims.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index b70540cc271..a4cd216c7ef 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
index b2ebcd50617..9a94f367b55 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/StaticPartitionShims.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
index 398bc8e187d..103185a12e0 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
index d1380facb86..f9098c9d468 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
index b52d9e4695c..429ea342fd8 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.rapids.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
index cbdf18ade4c..ad9d9818bde 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
@@ -18,8 +18,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
index 72cf48427db..0f459a8418c 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/CreateFunctions.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
index 72a640a5330..2fee066a722 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/FileSinkDescShim.scala
@@ -18,8 +18,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
index fd5186fecc8..54b6e469089 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/hive/rapids/shims/HiveInspectorsShim.scala
@@ -18,8 +18,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.hive.rapids.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/ShuffleExchangeShim.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/ShuffleExchangeShim.scala
index 5c6b5b21746..3b79f508be7 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/ShuffleExchangeShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/ShuffleExchangeShim.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
index a7c4d320fc4..c2615cd2562 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonOutput.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
index 50c5e280e9c..15be7fbcd15 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
@@ -15,6 +15,7 @@
  */
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
index 940a48bc225..8493fe596b9 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuBasePythonRunner.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
index 0317a89009e..8bc17813cf0 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
index 42c6178ff83..6ce24bee2a7 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
@@ -19,6 +19,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
index 63a4289c5b0..277e787889f 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
@@ -16,6 +16,7 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution.python.shims
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 9b800d4e51a..de4afc13c63 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -16,13 +16,9 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.sql.errors.QueryExecutionErrors
-
-object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils {
-  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
-    QueryExecutionErrors.invalidIndexOfZeroError(context = null)
-  }
-}
+object RapidsErrorUtils extends RapidsErrorUtils341DBPlusBase
+  with SequenceSizeTooLongErrorBuilder
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/spark341db/RapidsShuffleManager.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils341DBPlusBase.scala
similarity index 61%
rename from sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/spark341db/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils341DBPlusBase.scala
index a5500bd018c..ee2c2c6462b 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/spark341db/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils341DBPlusBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,13 +16,15 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark341db
+package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import org.apache.spark.sql.errors.QueryExecutionErrors
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+trait RapidsErrorUtils341DBPlusBase extends RapidsErrorUtilsBase
+  with RapidsQueryErrorUtils {
+  def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
+    QueryExecutionErrors.invalidIndexOfZeroError(context = null)
+  }
+}
diff --git a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala b/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala
index 23759510d08..e6cd86e60f8 100644
--- a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala
+++ b/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/shims/DecimalMultiply128.scala
@@ -17,8 +17,11 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/spark342/RapidsShuffleManager.scala b/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/spark342/RapidsShuffleManager.scala
deleted file mode 100644
index 42013abed3b..00000000000
--- a/sql-plugin/src/main/spark342/scala/com/nvidia/spark/rapids/spark342/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "342"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark342
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
similarity index 65%
rename from sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 182a3397bd8..d67a0d7aee6 100644
--- a/sql-plugin/src/main/spark343/scala/com/nvidia/spark/rapids/spark343/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark342/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -15,15 +15,14 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "351"}
+{"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark343
+package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+object RapidsErrorUtils extends RapidsErrorUtils340PlusBase
+  with SequenceSizeTooLongUnsuccessfulErrorBuilder
diff --git a/sql-plugin/src/main/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimServiceProvider.scala
new file mode 100644
index 00000000000..80f042e0ee7
--- /dev/null
+++ b/sql-plugin/src/main/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimServiceProvider.scala
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "344"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims.spark344
+
+import com.nvidia.spark.rapids.SparkShimVersion
+
+object SparkShimServiceProvider {
+  val VERSION = SparkShimVersion(3, 4, 4)
+  val VERSIONNAMES = Seq(s"$VERSION")
+}
+
+class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {
+
+  override def getShimVersion: SparkShimVersion = SparkShimServiceProvider.VERSION
+
+  override def matchesVersion(version: String): Boolean = {
+    SparkShimServiceProvider.VERSIONNAMES.contains(version)
+  }
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
index f1a86b9d049..fba9487a11f 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala
index 429e04162de..d87c7d5ccbf 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala
@@ -16,8 +16,10 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
index a90e5e55479..9fe9e0b4d49 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/GpuIntervalUtils.scala
@@ -17,6 +17,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
index cde738770be..910e6be1655 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/KeyGroupedPartitioningShim.scala
@@ -17,6 +17,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
index 3d41dd868ff..aa81ca0934f 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/LegacyBehaviorPolicyShim.scala
@@ -16,8 +16,10 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
@@ -31,4 +33,4 @@ object LegacyBehaviorPolicyShim {
   def isLegacyTimeParserPolicy(): Boolean = {
     SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
index caf337d5458..25486463349 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/NullOutputStreamShim.scala
@@ -16,8 +16,10 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
@@ -26,4 +28,4 @@ import org.apache.commons.io.output.NullOutputStream
 
 object NullOutputStreamShim {
   def INSTANCE = NullOutputStream.INSTANCE
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
index d82711dc7bb..a44ba6db894 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
index 47d3969e6cb..9dd96079bd4 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
@@ -61,4 +62,4 @@ object PythonMapInArrowExecShims {
       })
     ).map(r => (r.getClassFor.asSubclass(classOf[SparkPlan]), r)).toMap
 
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
index 5b6fb1f0b75..63281ae04e0 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/SparkShims.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/spark350/RapidsShuffleManager.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/spark350/RapidsShuffleManager.scala
deleted file mode 100644
index 6298b0e994d..00000000000
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/spark350/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "350"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark350
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
index 4e2983d34d8..322bbbad8be 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ParquetCVShims.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.parquet
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
index bc70fa37c61..21b597514e0 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.parquet.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
index 798acc28e4c..cb6b8587915 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicCreateTableAsSelectExec.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
index f6ddbdd4288..4a86c7b6987 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/datasources/v2/rapids/GpuAtomicReplaceTableAsSelectExec.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.datasources.v2.rapids
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
index 6a5be49807b..c3ad3bd5f43 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/execution/rapids/shims/FilePartitionShims.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.rapids.shims
 
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
index 3d9f5370b6f..36cc9aade00 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/execution/GpuShuffleMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.execution
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
index 4d5a0552961..951d61769cb 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/ArrowUtilsShim.scala
@@ -16,8 +16,10 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
@@ -37,4 +39,4 @@ object ArrowUtilsShim {
       errorOnDuplicatedFieldNames: Boolean = true, largeVarTypes: Boolean = false): Schema = {
     ArrowUtils.toArrowSchema(schema, timeZoneId, errorOnDuplicatedFieldNames, largeVarTypes)
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
index b07ccb43c79..a774c05cb5d 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/DataTypeUtilsShim.scala
@@ -16,8 +16,10 @@
 
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
@@ -32,4 +34,4 @@ object DataTypeUtilsShim {
 
   def toAttributes(structType: StructType): Seq[Attribute] =
     DataTypeUtils.toAttributes(structType)
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
index 5cb89e93df6..a34b65d6377 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuMapInPandasExecMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
index 9a0323ee289..eb560766a82 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
index e542220689a..8bba14248f6 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/SchemaMetadataShims.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
similarity index 93%
rename from sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
rename to sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
index e6c26eb65b8..5401639e167 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
@@ -15,14 +15,13 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 
 class BatchScanExecMeta(p: BatchScanExec,
diff --git a/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
new file mode 100644
index 00000000000..4b1315f1075
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.unsafe.types.VariantVal
+import org.apache.spark.unsafe.types.geo._
+
+
+final class CudfUnsafeRow(
+   attributes: Array[Attribute],
+   remapping: Array[Int]) extends CudfUnsafeRowBase(attributes, remapping) {
+
+  def getGeography(x$1: Int): GeographyVal = {
+    throw new UnsupportedOperationException("Not Implemented yet")
+  }
+
+  def getGeometry(x$1: Int): GeometryVal = {
+    throw new UnsupportedOperationException("Not Implemented yet")
+  }
+
+  def getVariant(x$1: Int): VariantVal = {
+    throw new UnsupportedOperationException("Not Implemented yet")
+  }
+
+}
+
+object CudfUnsafeRow extends CudfUnsafeRowTrait
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
similarity index 89%
rename from sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
rename to sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
index c2d2c8d934e..9a5a4a5a46f 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
@@ -15,6 +15,7 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
@@ -25,15 +26,14 @@ import com.nvidia.spark.rapids.GpuScan
 import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal, RowOrdering, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal, SortOrder}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, KeyGroupedShuffleSpec, Partitioning, SinglePartition}
+import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, Partitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.util.{truncatedString, InternalRowComparableWrapper}
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.execution.datasources.rapids.DataSourceStrategyUtils
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDD, StoragePartitionJoinParams}
-import org.apache.spark.sql.internal.SQLConf
 
 case class GpuBatchScanExec(
     output: Seq[AttributeReference],
@@ -166,18 +166,6 @@ case class GpuBatchScanExec(
               (groupedParts, expressions)
           }
 
-          // Also re-group the partitions if we are reducing compatible partition expressions
-          val finalGroupedPartitions = spjParams.reducers match {
-            case Some(reducers) =>
-              val result = groupedPartitions.groupBy { case (row, _) =>
-                KeyGroupedShuffleSpec.reducePartitionValue(row, partExpressions, reducers)
-              }.map { case (wrapper, splits) => (wrapper.row, splits.flatMap(_._2)) }.toSeq
-              val rowOrdering = RowOrdering.createNaturalAscendingOrdering(
-                partExpressions.map(_.dataType))
-              result.sorted(rowOrdering.on((t: (InternalRow, _)) => t._1))
-            case _ => groupedPartitions
-          }
-
           // When partially clustered, the input partitions are not grouped by partition
           // values. Here we'll need to check `commonPartitionValues` and decide how to group
           // and replicate splits within a partition.
@@ -185,15 +173,15 @@ case class GpuBatchScanExec(
             // A mapping from the common partition values to how many splits the partition
             // should contain.
             val commonPartValuesMap = spjParams.commonPartitionValues
-              .get
-              .map(t => (InternalRowComparableWrapper(t._1, partExpressions), t._2))
-              .toMap
-            val nestGroupedPartitions = finalGroupedPartitions.map { case (partValue, splits) =>
+                .get
+                .map(t => (InternalRowComparableWrapper(t._1, partExpressions), t._2))
+                .toMap
+            val nestGroupedPartitions = groupedPartitions.map { case (partValue, splits) =>
               // `commonPartValuesMap` should contain the part value since it's the super set.
               val numSplits = commonPartValuesMap
-                .get(InternalRowComparableWrapper(partValue, partExpressions))
+                  .get(InternalRowComparableWrapper(partValue, partExpressions))
               assert(numSplits.isDefined, s"Partition value $partValue does not exist in " +
-                "common partition values from Spark plan")
+                  "common partition values from Spark plan")
 
               val newSplits = if (spjParams.replicatePartitions) {
                 // We need to also replicate partitions according to the other side of join
@@ -221,7 +209,7 @@ case class GpuBatchScanExec(
           } else {
             // either `commonPartitionValues` is not defined, or it is defined but
             // `applyPartialClustering` is false.
-            val partitionMapping = finalGroupedPartitions.map { case (partValue, splits) =>
+            val partitionMapping = groupedPartitions.map { case (partValue, splits) =>
               InternalRowComparableWrapper(partValue, partExpressions) -> splits
             }.toMap
 
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
similarity index 98%
rename from sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
rename to sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
index de8e98962a7..583cdfc1026 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -16,6 +16,7 @@
  */
 
 /*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
new file mode 100644
index 00000000000..8a66e9559a6
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+trait PartitionedFileUtilsShimBase {
+
+  // Wrapper for case class constructor so Java code can access
+  // the default values across Spark versions.
+  def newPartitionedFile(partitionValues: InternalRow,
+                         filePath: String,
+                         start: Long,
+                         length: Long): PartitionedFile = PartitionedFile(partitionValues,
+    SparkPath.fromPathString(filePath), start, length)
+
+  def withNewLocations(pf: PartitionedFile, locations: Seq[String]): PartitionedFile = {
+    pf.copy(locations = locations)
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
new file mode 100644
index 00000000000..4de832eab39
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.python.MapInArrowExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.shims.GpuPythonMapInArrowExecMeta
+import org.apache.spark.sql.types.{BinaryType, StringType}
+
+object PythonMapInArrowExecShims {
+
+  def execs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = Seq(
+      GpuOverrides.exec[MapInArrowExec](
+        "The backend for Map Arrow Iterator UDF. Accelerates the data transfer between the" +
+          " Java process and the Python process. It also supports scheduling GPU resources" +
+          " for the Python process when enabled.",
+        ExecChecks((TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT).nested(),
+          TypeSig.all),
+        (mapPy, conf, p, r) => new GpuPythonMapInArrowExecMeta(mapPy, conf, p, r) {
+          override def tagPlanForGpu(): Unit = {
+            super.tagPlanForGpu()
+            if (SQLConf.get.getConf(SQLConf.ARROW_EXECUTION_USE_LARGE_VAR_TYPES)) {
+
+              val inputTypes = mapPy.child.schema.fields.map(_.dataType)
+              val outputTypes = mapPy.output.map(_.dataType)
+
+              val hasStringOrBinaryTypes = (inputTypes ++ outputTypes).exists(dataType =>
+                TrampolineUtil.dataTypeExistsRecursively(dataType,
+                  dt => dt == StringType || dt == BinaryType))
+
+              if (hasStringOrBinaryTypes) {
+                willNotWorkOnGpu(s"${SQLConf.ARROW_EXECUTION_USE_LARGE_VAR_TYPES.key} is " +
+                  s"enabled and the schema contains string or binary types. This is not " +
+                  s"supported on the GPU.")
+              }
+            }
+          }
+      })
+    ).map(r => (r.getClassFor.asSubclass(classOf[SparkPlan]), r)).toMap
+
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
similarity index 79%
rename from sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
rename to sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
index 70d40fc19a0..2d1f011b9d9 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 /*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
-import com.nvidia.spark.rapids.{ExprRule, GpuOverrides}
-import com.nvidia.spark.rapids.{ExprChecks, GpuExpression, TypeSig, UnaryExprMeta}
+import com.nvidia.spark.rapids.ExprRule
 
-import org.apache.spark.sql.catalyst.expressions.{Expression, RaiseError}
+import org.apache.spark.sql.catalyst.expressions.Expression
 
 object RaiseErrorShim {
   val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Map.empty
diff --git a/sql-plugin/src/main/spark332/scala/com/nvidia/spark/rapids/spark332/RapidsShuffleManager.scala b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/spark350db143/SparkShimServiceProvider.scala
similarity index 54%
rename from sql-plugin/src/main/spark332/scala/com/nvidia/spark/rapids/spark332/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/spark350db143/SparkShimServiceProvider.scala
index 036e7d3c1ec..d7a87ecbee6 100644
--- a/sql-plugin/src/main/spark332/scala/com/nvidia/spark/rapids/spark332/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark350db143/scala/com/nvidia/spark/rapids/shims/spark350db143/SparkShimServiceProvider.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,16 +15,21 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "332"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark332
+package com.nvidia.spark.rapids.shims.spark350db143
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import com.nvidia.spark.rapids._
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+object SparkShimServiceProvider {
+  val VERSION = DatabricksShimVersion(3, 5, 0, "14.3")
+}
 
+class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {
+
+  override def getShimVersion: ShimVersion = SparkShimServiceProvider.VERSION
+
+  def matchesVersion(version: String): Boolean = {
+    DatabricksShimServiceProvider.matchesVersion("14.3.x")
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
new file mode 100644
index 00000000000..a7e5ac55c76
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.rapids.shims
+
+import com.nvidia.spark.rapids.GpuPartitioning
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.AdaptiveRepartitioningStatus
+import org.apache.spark.sql.execution.exchange.{ShuffleExchangeLike, ShuffleOrigin}
+import org.apache.spark.sql.rapids.execution.ShuffledBatchRDD
+
+case class GpuShuffleExchangeExec(
+    gpuOutputPartitioning: GpuPartitioning,
+    child: SparkPlan,
+    shuffleOrigin: ShuffleOrigin)(
+    cpuOutputPartitioning: Partitioning)
+  extends GpuDatabricksShuffleExchangeExecBase(gpuOutputPartitioning, child, shuffleOrigin)(
+    cpuOutputPartitioning) {
+
+  override def getShuffleRDD(
+      partitionSpecs: Array[ShufflePartitionSpec],
+      lazyFetching: Boolean): RDD[_] = {
+    new ShuffledBatchRDD(shuffleDependencyColumnar, metrics ++ readMetrics, partitionSpecs)
+  }
+
+  // DB SPECIFIC - throw if called since we don't know how its used
+  override def withNewNumPartitions(numPartitions: Int): ShuffleExchangeLike = {
+    throw new UnsupportedOperationException
+  }
+
+  // DB SPECIFIC - throw if called since we don't know how its used
+  override def targetOutputPartitioning: Partitioning = {
+    throw new UnsupportedOperationException
+  }
+
+  def adaptiveRepartitioningStatus(): AdaptiveRepartitioningStatus = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  def repartition(numPartitions: Int,updatedRepartitioningStatus: AdaptiveRepartitioningStatus):
+     ShuffleExchangeLike = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala
new file mode 100644
index 00000000000..271d9a6dae7
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/RapidsVectorizedColumnReader.scala
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.time.ZoneId
+
+import org.apache.parquet.VersionParser.ParsedVersion
+import org.apache.parquet.column.ColumnDescriptor
+import org.apache.parquet.column.page.PageReadStore
+
+object RapidsVectorizedColumnReader {
+  def apply(descriptor: ColumnDescriptor,
+      isRequired: Boolean,
+      pageReadStore: PageReadStore,
+      convertTz: ZoneId,
+      datetimeRebaseMode: String,
+      datetimeRebaseTz: String,
+      int96RebaseMode: String,
+      int96RebaseTz: String,
+      writerVersion: ParsedVersion) = {
+    val useNativeDictionary = false
+    new VectorizedColumnReader(
+      descriptor,
+      useNativeDictionary,
+      isRequired,
+      pageReadStore,
+      null,
+      datetimeRebaseMode,
+      datetimeRebaseTz,
+      int96RebaseMode,
+      null,
+      writerVersion)
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala
new file mode 100644
index 00000000000..f78ba6b7033
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimParquetColumnVector.scala
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.spark.memory.MemoryMode
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector
+
+object ShimParquetColumnVector {
+  def apply(
+    column: ParquetColumn,
+    vector: WritableColumnVector,
+    capacity: Int,
+    memoryMode: MemoryMode,
+    missingColumns: java.util.Set[ParquetColumn],
+    isTopLevel: Boolean,
+    defaultValue: Any): ParquetColumnVector = {
+    new ParquetColumnVector(column, vector, capacity, memoryMode, missingColumns, isTopLevel,
+      defaultValue, "")
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
new file mode 100644
index 00000000000..6d5465b053c
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/execution/datasources/parquet/rapids/shims/ShimVectorizedColumnReader.scala
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.execution.datasources.parquet.rapids.shims
+
+import java.time.ZoneId
+import java.util.TimeZone
+
+import org.apache.parquet.VersionParser.ParsedVersion
+import org.apache.parquet.column.ColumnDescriptor
+import org.apache.parquet.column.page.PageReadStore
+import org.apache.parquet.schema.{GroupType, Type}
+
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.execution.datasources.parquet.{ParentContainerUpdater, ParquetRowConverter, ParquetToSparkSchemaConverter, VectorizedColumnReader}
+import org.apache.spark.sql.internal.LegacyBehaviorPolicy
+import org.apache.spark.sql.types.StructType
+
+class ShimParquetRowConverter(
+    schemaConverter: ParquetToSparkSchemaConverter,
+    parquetType: GroupType,
+    catalystType: StructType,
+    convertTz: Option[ZoneId],
+    datetimeRebaseMode: String,  // always LegacyBehaviorPolicy.CORRECTED
+    int96RebaseMode: String,  // always LegacyBehaviorPolicy.EXCEPTION
+    int96CDPHive3Compatibility: Boolean,
+    updater: ParentContainerUpdater
+) extends ParquetRowConverter(
+      schemaConverter,
+      parquetType,
+      catalystType,
+      convertTz,
+      // no need to rebase, so set originTimeZone as default
+      RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)),
+      // no need to rebase, so set originTimeZone as default
+      RebaseSpec(LegacyBehaviorPolicy.withName(int96RebaseMode)),
+      updater)
+
+class ShimVectorizedColumnReader(
+    index: Int,
+    columns: java.util.List[ColumnDescriptor],
+    types: java.util.List[Type],
+    pageReadStore: PageReadStore,
+    convertTz: ZoneId,
+    datetimeRebaseMode: String, // always LegacyBehaviorPolicy.CORRECTED
+    int96RebaseMode: String, // always LegacyBehaviorPolicy.EXCEPTION
+    int96CDPHive3Compatibility: Boolean,
+    writerVersion: ParsedVersion
+) extends VectorizedColumnReader(
+      columns.get(index),
+      true,
+      false,
+      pageReadStore,
+      convertTz,
+      datetimeRebaseMode,
+      TimeZone.getDefault.getID, // use default zone because of no rebase
+      int96RebaseMode,
+      TimeZone.getDefault.getID, // use default zone because of will throw exception if rebase
+      writerVersion)
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala
new file mode 100644
index 00000000000..e333d6f3f0c
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/RapidsShuffleWriter.scala
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+
+package org.apache.spark.sql.rapids
+
+import scala.collection.mutable
+
+import ai.rapids.cudf.{NvtxColor, NvtxRange}
+import com.nvidia.spark.rapids._
+import com.nvidia.spark.rapids.shuffle.{RapidsShuffleServer, RapidsShuffleTransport}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.MapStatusWithStats
+import org.apache.spark.shuffle.ShuffleWriter
+import org.apache.spark.storage._
+
+abstract class RapidsShuffleWriter[K, V]()
+      extends ShuffleWriter[K, V]
+        with Logging {
+  protected var myMapStatus: Option[MapStatusWithStats] = None
+  protected val diskBlockObjectWriters = new mutable.HashMap[Int, (Int, DiskBlockObjectWriter)]()
+  /**
+   * Are we in the process of stopping? Because map tasks can call stop() with success = true
+   * and then call stop() with success = false if they get an exception, we want to make sure
+   * we don't try deleting files, etc twice.
+   */
+  private var stopping = false
+
+  def getMapStatus(
+    loc: BlockManagerId,
+    uncompressedSizes: Array[Long],
+    mapTaskId: Long): MapStatusWithStats = {
+    MapStatusWithStats(loc, uncompressedSizes, mapTaskId)
+  }
+
+  override def stop(success: Boolean): Option[MapStatusWithStats] = {
+    if (stopping) {
+      None
+    } else {
+      stopping = true
+      if (success) {
+        if (myMapStatus.isEmpty) {
+          // should not happen, but adding it just in case (this differs from Spark)
+          cleanupTempData()
+          throw new IllegalStateException("Cannot call stop(true) without having called write()");
+        }
+        myMapStatus
+      } else {
+        cleanupTempData()
+        None
+      }
+    }
+  }
+
+  private def cleanupTempData(): Unit = {
+    // The map task failed, so delete our output data.
+    try {
+      diskBlockObjectWriters.values.foreach { case (_, writer) =>
+        val file = writer.revertPartialWritesAndClose()
+        if (!file.delete()) logError(s"Error while deleting file ${file.getAbsolutePath()}")
+      }
+    } finally {
+      diskBlockObjectWriters.clear()
+    }
+  }
+}
+
+abstract class RapidsCachingWriterBase[K, V](
+   blockManager: BlockManager,
+   handle: GpuShuffleHandle[K, V],
+   mapId: Long,
+   rapidsShuffleServer: Option[RapidsShuffleServer],
+   catalog: ShuffleBufferCatalog)
+  extends ShuffleWriter[K, V]
+    with Logging {
+  protected val numParts = handle.dependency.partitioner.numPartitions
+  protected val sizes = new Array[Long](numParts)
+
+  /**
+   * Used to remove shuffle buffers when the writing task detects an error, calling `stop(false)`
+   */
+  private def cleanStorage(): Unit = {
+    catalog.removeCachedHandles()
+  }
+
+  override def stop(success: Boolean): Option[MapStatusWithStats] = {
+    val nvtxRange = new NvtxRange("RapidsCachingWriter.close", NvtxColor.CYAN)
+    try {
+      if (!success) {
+        cleanStorage()
+        None
+      } else {
+        // upon seeing this port, the other side will try to connect to the port
+        // in order to establish an UCX endpoint (on demand), if the topology has "rapids" in it.
+        val shuffleServerId = if (rapidsShuffleServer.isDefined) {
+          val originalShuffleServerId = rapidsShuffleServer.get.originalShuffleServerId
+          val server = rapidsShuffleServer.get
+          BlockManagerId(
+            originalShuffleServerId.executorId,
+            originalShuffleServerId.host,
+            originalShuffleServerId.port,
+            Some(s"${RapidsShuffleTransport.BLOCK_MANAGER_ID_TOPO_PREFIX}=${server.getPort}"))
+        } else {
+          blockManager.shuffleServerId
+        }
+        logInfo(s"Done caching shuffle success=$success, server_id=$shuffleServerId, "
+          + s"map_id=$mapId, sizes=${sizes.mkString(",")}")
+        Some(MapStatusWithStats(shuffleServerId, sizes, mapId))
+      }
+    } finally {
+      nvtxRange.close()
+    }
+  }
+
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala
new file mode 100644
index 00000000000..10e3fa68b76
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/execution/GpuSubqueryBroadcastMeta.scala
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.execution
+
+import com.nvidia.spark.rapids.{DataFromReplacementRule, GpuExec, RapidsConf, RapidsMeta}
+
+import org.apache.spark.sql.execution.SubqueryBroadcastExec
+
+class GpuSubqueryBroadcastMeta(
+    s: SubqueryBroadcastExec,
+    conf: RapidsConf,
+    p: Option[RapidsMeta[_, _, _]],
+    r: DataFromReplacementRule) extends
+    GpuSubqueryBroadcastMeta330DBBase(s, conf, p, r) {
+  override def convertToGpu(): GpuExec = {
+    GpuSubqueryBroadcastExec(s.name, s.indices, s.buildKeys, broadcastBuilder())(
+      getBroadcastModeKeyExprs)
+  }
+}
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
new file mode 100644
index 00000000000..ea1af0ecbbb
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.api.python.PythonEvalType
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PythonUDF}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.python.MapInArrowExec
+import org.apache.spark.sql.rapids.execution.python.GpuMapInBatchExec
+
+class GpuPythonMapInArrowExecMetaBase(
+    mapArrow: MapInArrowExec,
+    conf: RapidsConf,
+    parent: Option[RapidsMeta[_, _, _]],
+    rule: DataFromReplacementRule)
+  extends SparkPlanMeta[MapInArrowExec](mapArrow, conf, parent, rule) {
+
+  override def replaceMessage: String = "partially run on GPU"
+  override def noReplacementPossibleMessage(reasons: String): String =
+    s"cannot run even partially on the GPU because $reasons"
+
+  protected val udf: BaseExprMeta[PythonUDF] = GpuOverrides.wrapExpr(
+    mapArrow.func.asInstanceOf[PythonUDF], conf, Some(this))
+  protected val resultAttrs: Seq[BaseExprMeta[Attribute]] =
+    mapArrow.output.map(GpuOverrides.wrapExpr(_, conf, Some(this)))
+
+  override val childExprs: Seq[BaseExprMeta[_]] = resultAttrs :+ udf
+
+  override def convertToGpu(): GpuExec =
+    GpuPythonMapInArrowExec(
+      udf.convertToGpu(),
+      resultAttrs.map(_.convertToGpu()).asInstanceOf[Seq[Attribute]],
+      childPlans.head.convertIfNeeded(),
+      isBarrier = false,
+    )
+}
+
+/*
+ * A relation produced by applying a function that takes an iterator of PyArrow's record
+ * batches and outputs an iterator of PyArrow's record batches.
+ *
+ * This GpuMapInPandasExec aims at accelerating the data transfer between
+ * JVM and Python, and scheduling GPU resources for its Python processes.
+ *
+ */
+case class GpuPythonMapInArrowExec(
+    func: Expression,
+    output: Seq[Attribute],
+    child: SparkPlan,
+    override val isBarrier: Boolean) extends GpuMapInBatchExec {
+
+  override protected val pythonEvalType: Int = PythonEvalType.SQL_MAP_ARROW_ITER_UDF
+}
diff --git a/sql-plugin/src/main/spark341/scala/com/nvidia/spark/rapids/spark341/RapidsShuffleManager.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
similarity index 59%
rename from sql-plugin/src/main/spark341/scala/com/nvidia/spark/rapids/spark341/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
index d0a259f7263..8ddd15f7171 100644
--- a/sql-plugin/src/main/spark341/scala/com/nvidia/spark/rapids/spark341/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,18 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "341"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark341
+package org.apache.spark.sql.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.execution.python._
+
+class GpuPythonMapInArrowExecMeta(
+    mapArrow: MapInArrowExec,
+    conf: RapidsConf,
+    parent: Option[RapidsMeta[_, _, _]],
+    rule: DataFromReplacementRule)
+  extends GpuPythonMapInArrowExecMetaBase(mapArrow, conf, parent, rule)
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
new file mode 100644
index 00000000000..d368b0b5475
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+object RapidsErrorUtils extends RapidsErrorUtils341DBPlusBase
+  with SequenceSizeExceededLimitErrorBuilder
diff --git a/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala
new file mode 100644
index 00000000000..f9790817b71
--- /dev/null
+++ b/sql-plugin/src/main/spark350db143/scala/org/apache/spark/sql/rapids/shims/SequenceSizeExceededLimitErrorBuilder.scala
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350db143"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.spark.sql.errors.QueryExecutionErrors
+
+trait SequenceSizeExceededLimitErrorBuilder {
+  def getTooLongSequenceErrorString(sequenceSize: Int, functionName: String): String = {
+    QueryExecutionErrors.createArrayWithElementsExceedLimitError(functionName, sequenceSize)
+                        .getMessage
+  }
+}
diff --git a/sql-plugin/src/main/spark351/scala/com/nvidia/spark/rapids/spark351/RapidsShuffleManager.scala b/sql-plugin/src/main/spark351/scala/com/nvidia/spark/rapids/spark351/RapidsShuffleManager.scala
deleted file mode 100644
index a5a31ad6f7c..00000000000
--- a/sql-plugin/src/main/spark351/scala/com/nvidia/spark/rapids/spark351/RapidsShuffleManager.scala
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*** spark-rapids-shim-json-lines
-{"spark": "351"}
-spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark351
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
-
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
diff --git a/sql-plugin/src/main/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimServiceProvider.scala
new file mode 100644
index 00000000000..8eaf51f7177
--- /dev/null
+++ b/sql-plugin/src/main/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimServiceProvider.scala
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims.spark353
+
+import com.nvidia.spark.rapids.SparkShimVersion
+
+object SparkShimServiceProvider {
+  val VERSION = SparkShimVersion(3, 5, 3)
+  val VERSIONNAMES = Seq(s"$VERSION")
+}
+
+class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {
+
+  override def getShimVersion: SparkShimVersion = SparkShimServiceProvider.VERSION
+
+  override def matchesVersion(version: String): Boolean = {
+    SparkShimServiceProvider.VERSIONNAMES.contains(version)
+  }
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
index 623005654fc..77f28bd77d5 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
@@ -30,4 +30,4 @@ final class CudfUnsafeRow(
   }
 }
 
-object CudfUnsafeRow extends CudfUnsafeRowTrait
\ No newline at end of file
+object CudfUnsafeRow extends CudfUnsafeRowTrait
diff --git a/sql-plugin/src/main/spark321cdh/scala/com/nvidia/spark/rapids/spark321cdh/RapidsShuffleManager.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala
similarity index 58%
rename from sql-plugin/src/main/spark321cdh/scala/com/nvidia/spark/rapids/spark321cdh/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala
index 9efba986550..4379c5e1f1d 100644
--- a/sql-plugin/src/main/spark321cdh/scala/com/nvidia/spark/rapids/spark321cdh/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/LogicalPlanShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,15 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "321cdh"}
+{"spark": "400"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark321cdh
+package com.nvidia.spark.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelationWithTable}
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+object LogicalPlanShims {
+  def getLocations(plan: LogicalPlan): Seq[FileIndex] = plan.collect {
+    case LogicalRelationWithTable(t: HadoopFsRelation, _) => t.location
+  }
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala
similarity index 65%
rename from sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala
index e299d4a0343..7ac3b56ab3e 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/spark400/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/NullIntolerantShim.scala
@@ -17,13 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark400
+package com.nvidia.spark.rapids.shims
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import org.apache.spark.sql.catalyst.expressions.Expression
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+trait NullIntolerantShim extends Expression {
+  override def nullIntolerant: Boolean = true
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala
index 454515db35e..e432c49ee0a 100644
--- a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/spark400/SparkShimServiceProvider.scala
@@ -23,7 +23,7 @@ import com.nvidia.spark.rapids.SparkShimVersion
 
 object SparkShimServiceProvider {
   val VERSION = SparkShimVersion(4, 0, 0)
-  val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT", s"$VERSION-preview1")
+  val VERSIONNAMES = Seq(s"$VERSION", s"$VERSION-SNAPSHOT", s"$VERSION-preview2")
 }
 
 class SparkShimServiceProvider extends com.nvidia.spark.rapids.SparkShimServiceProvider {
diff --git a/sql-plugin/src/main/spark333/scala/com/nvidia/spark/rapids/spark333/RapidsShuffleManager.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala
similarity index 59%
rename from sql-plugin/src/main/spark333/scala/com/nvidia/spark/rapids/spark333/RapidsShuffleManager.scala
rename to sql-plugin/src/main/spark400/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala
index 68ca4947610..e67dfb450d8 100644
--- a/sql-plugin/src/main/spark333/scala/com/nvidia/spark/rapids/spark333/RapidsShuffleManager.scala
+++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/nvidia/DFUDFShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,15 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "333"}
+{"spark": "400"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark333
+package org.apache.spark.sql.nvidia
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+object DFUDFShims {
+  def columnToExpr(c: Column): Expression = c
+  def exprToColumn(e: Expression): Column = e
+}
diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
new file mode 100644
index 00000000000..51f56f612fd
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+object RapidsErrorUtils extends RapidsErrorUtils340PlusBase
+  with SequenceSizeExceededLimitErrorBuilder
diff --git a/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStreamSuite.scala b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStreamSuite.scala
new file mode 100644
index 00000000000..a4fa35349ce
--- /dev/null
+++ b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/AsyncOutputStreamSuite.scala
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.io.{BufferedOutputStream, File, FileOutputStream, IOException, OutputStream}
+import java.util.concurrent.Callable
+
+import com.nvidia.spark.rapids.Arm.withResource
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.funsuite.AnyFunSuite
+
+class AsyncOutputStreamSuite extends AnyFunSuite with BeforeAndAfterEach {
+
+  private val bufLen = 128 * 1024
+  private val buf: Array[Byte] = new Array[Byte](bufLen)
+  private val maxBufCount = 10
+  private val trafficController = new TrafficController(
+    new HostMemoryThrottle(bufLen * maxBufCount))
+
+  def openStream(): AsyncOutputStream = {
+    new AsyncOutputStream(() => {
+      val file = File.createTempFile("async-write-test", "tmp")
+      new BufferedOutputStream(new FileOutputStream(file))
+    }, trafficController)
+  }
+
+  test("open, write, and close") {
+    val numBufs = 1000
+    val stream = openStream()
+    withResource(stream) { os =>
+      for (_ <- 0 until numBufs) {
+        os.write(buf)
+      }
+    }
+    assertResult(bufLen * numBufs)(stream.metrics.numBytesScheduled)
+    assertResult(bufLen * numBufs)(stream.metrics.numBytesWritten.get())
+  }
+
+  test("write after closed") {
+    val os = openStream()
+    os.close()
+    assertThrows[IOException] {
+      os.write(buf)
+    }
+  }
+
+  test("flush after closed") {
+    val os = openStream()
+    os.close()
+    assertThrows[IOException] {
+      os.flush()
+    }
+  }
+
+  class ThrowingOutputStream extends OutputStream {
+
+    var failureCount = 0
+
+    override def write(i: Int): Unit = {
+      failureCount += 1
+      throw new IOException(s"Failed ${failureCount} times")
+    }
+
+    override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+      failureCount += 1
+      throw new IOException(s"Failed ${failureCount} times")
+    }
+  }
+
+  def assertThrowsWithMsg[T](fn: Callable[T], clue: String,
+      expectedMsgPrefix: String): Unit = {
+    withClue(clue) {
+      try {
+        fn.call()
+      } catch {
+        case t: Throwable =>
+          assertIOExceptionMsg(t, expectedMsgPrefix)
+      }
+    }
+  }
+
+  def assertIOExceptionMsg(t: Throwable, expectedMsgPrefix: String): Unit = {
+    if (t.getClass.isAssignableFrom(classOf[IOException])) {
+      if (!t.getMessage.contains(expectedMsgPrefix)) {
+        fail(s"Unexpected exception message: ${t.getMessage}")
+      }
+    } else {
+      if (t.getCause != null) {
+        assertIOExceptionMsg(t.getCause, expectedMsgPrefix)
+      } else {
+        fail(s"Unexpected exception: $t")
+      }
+    }
+  }
+
+  test("write after error") {
+    val os = new AsyncOutputStream(() => new ThrowingOutputStream, trafficController)
+
+    // The first call to `write` should succeed
+    os.write(buf)
+
+    // Wait for the first write to fail
+    while (os.lastError.get().isEmpty) {
+      Thread.sleep(100)
+    }
+
+    // The second `write` call should fail with the exception thrown by the first write failure
+    assertThrowsWithMsg(() => os.write(buf),
+      "The second write should fail with the exception thrown by the first write failure",
+      "Failed 1 times")
+
+    // `close` throws the same exception
+    assertThrowsWithMsg(() => os.close(),
+      "The second write should fail with the exception thrown by the first write failure",
+      "Failed 1 times")
+
+    assertResult(bufLen)(os.metrics.numBytesScheduled)
+    assertResult(0)(os.metrics.numBytesWritten.get())
+    assert(os.lastError.get().get.isInstanceOf[IOException])
+  }
+
+  test("flush after error") {
+    val os = new AsyncOutputStream(() => new ThrowingOutputStream, trafficController)
+
+    // The first write should succeed
+    os.write(buf)
+
+    // The flush should fail with the exception thrown by the write failure
+    assertThrowsWithMsg(() => os.flush(),
+      "The flush should fail with the exception thrown by the write failure",
+      "Failed 1 times")
+
+    // `close` throws the same exception
+    assertThrowsWithMsg(() => os.close(),
+      "The flush should fail with the exception thrown by the write failure",
+      "Failed 1 times")
+  }
+
+  test("close after error") {
+    val os = new AsyncOutputStream(() => new ThrowingOutputStream, trafficController)
+
+    os.write(buf)
+
+    assertThrowsWithMsg(() => os.close(),
+      "Close should fail with the exception thrown by the write failure",
+      "Failed 1 times")
+  }
+}
diff --git a/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutorSuite.scala b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutorSuite.scala
new file mode 100644
index 00000000000..a8acf240878
--- /dev/null
+++ b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/ThrottlingExecutorSuite.scala
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.util.concurrent.{Callable, CountDownLatch, ExecutionException, Executors, Future, RejectedExecutionException, TimeUnit}
+
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.funsuite.AnyFunSuite
+
+class ThrottlingExecutorSuite extends AnyFunSuite with BeforeAndAfterEach {
+
+  // Some tests might take longer than usual in the limited CI environment.
+  // Use a long timeout to avoid flakiness.
+  val longTimeoutSec = 5
+
+  var throttle: HostMemoryThrottle = _
+  var trafficController: TrafficController = _
+  var executor: ThrottlingExecutor = _
+
+  class TestTask extends Callable[Unit] {
+    val latch = new CountDownLatch(1)
+    override def call(): Unit = {
+      latch.await()
+    }
+  }
+
+  override def beforeEach(): Unit = {
+    throttle = new HostMemoryThrottle(100)
+    trafficController = new TrafficController(throttle)
+    executor = new ThrottlingExecutor(
+      Executors.newSingleThreadExecutor(),
+      trafficController
+    )
+  }
+
+  override def afterEach(): Unit = {
+    executor.shutdownNow(longTimeoutSec, TimeUnit.SECONDS)
+  }
+
+  test("tasks submitted should update the state") {
+    val task1 = new TestTask
+    val future1 = executor.submit(task1, 10)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(10)(throttle.getTotalHostMemoryBytes)
+
+    val task2 = new TestTask
+    val future2 = executor.submit(task2, 20)
+    assertResult(2)(trafficController.numScheduledTasks)
+    assertResult(30)(throttle.getTotalHostMemoryBytes)
+
+    task1.latch.countDown()
+    future1.get(longTimeoutSec, TimeUnit.SECONDS)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(20)(throttle.getTotalHostMemoryBytes)
+
+    task2.latch.countDown()
+    future2.get(longTimeoutSec, TimeUnit.SECONDS)
+    assertResult(0)(trafficController.numScheduledTasks)
+    assertResult(0)(throttle.getTotalHostMemoryBytes)
+  }
+
+  test("tasks submission fails if total weight exceeds maxWeight") {
+    val task1 = new TestTask
+    val future1 = executor.submit(task1, 10)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(10)(throttle.getTotalHostMemoryBytes)
+
+    val task2 = new TestTask
+    val task2Weight = 100
+    val exec = Executors.newSingleThreadExecutor()
+    val future2 = exec.submit(new Runnable {
+      override def run(): Unit = executor.submit(task2, task2Weight)
+    })
+    Thread.sleep(100)
+    assert(!future2.isDone)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(10)(throttle.getTotalHostMemoryBytes)
+
+    task1.latch.countDown()
+    future1.get(longTimeoutSec, TimeUnit.SECONDS)
+    future2.get(longTimeoutSec, TimeUnit.SECONDS)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(task2Weight)(throttle.getTotalHostMemoryBytes)
+  }
+
+  test("submit one task heavier than maxWeight") {
+    val future = executor.submit(() => Thread.sleep(10), throttle.maxInFlightHostMemoryBytes + 1)
+    future.get(longTimeoutSec, TimeUnit.SECONDS)
+    assert(future.isDone)
+    assertResult(0)(trafficController.numScheduledTasks)
+    assertResult(0)(throttle.getTotalHostMemoryBytes)
+  }
+
+  test("submit multiple tasks such that total weight does not exceed maxWeight") {
+    val numTasks = 10
+    val taskRunTime = 10
+    var future: Future[Unit] = null
+    for (_ <- 0 to numTasks) {
+      future = executor.submit(() => Thread.sleep(taskRunTime), 1)
+    }
+    // Give enough time for all tasks to complete
+    future.get(numTasks * taskRunTime * 5, TimeUnit.MILLISECONDS)
+    assertResult(0)(trafficController.numScheduledTasks)
+    assertResult(0)(throttle.getTotalHostMemoryBytes)
+  }
+
+  test("shutdown while a task is blocked") {
+    val task1 = new TestTask
+    val future1 = executor.submit(task1, 10)
+    assertResult(1)(trafficController.numScheduledTasks)
+    assertResult(10)(throttle.getTotalHostMemoryBytes)
+
+    val task2 = new TestTask
+    val task2Weight = 100
+    val exec = Executors.newSingleThreadExecutor()
+    val future2 = exec.submit(new Runnable {
+      override def run(): Unit = executor.submit(task2, task2Weight)
+    })
+    executor.shutdownNow(longTimeoutSec, TimeUnit.SECONDS)
+
+    def assertCause(t: Throwable, cause: Class[_]): Unit = {
+      assert(t.getCause != null)
+      assert(cause.isInstance(t.getCause))
+    }
+
+    val e1 = intercept[ExecutionException](future1.get())
+    assertCause(e1, classOf[InterruptedException])
+    val e2 = intercept[ExecutionException](future2.get())
+    assertCause(e2, classOf[RejectedExecutionException])
+  }
+}
diff --git a/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/TrafficControllerSuite.scala b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/TrafficControllerSuite.scala
new file mode 100644
index 00000000000..32868ff6055
--- /dev/null
+++ b/sql-plugin/src/test/scala/com/nvidia/spark/rapids/io/async/TrafficControllerSuite.scala
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.io.async
+
+import java.util.concurrent.{ExecutionException, Executors, ExecutorService, TimeUnit}
+
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.funsuite.AnyFunSuite
+
+class TrafficControllerSuite extends AnyFunSuite with BeforeAndAfterEach {
+
+  private var throttle: HostMemoryThrottle = _
+  private var controller: TrafficController = _
+  private var executor: ExecutorService = _
+
+  override def beforeEach(): Unit = {
+    throttle = new HostMemoryThrottle(100)
+    controller = new TrafficController(throttle)
+    executor = Executors.newSingleThreadExecutor()
+  }
+
+  override def afterEach(): Unit = {
+    executor.shutdownNow()
+    executor.awaitTermination(1, TimeUnit.SECONDS)
+  }
+
+  class TestTask(taskMemoryBytes: Long) extends Task[Unit](taskMemoryBytes, () => {}) {}
+
+  test("schedule tasks without blocking") {
+    val taskMemoryBytes = 50
+    val t1 = new TestTask(taskMemoryBytes)
+    controller.blockUntilRunnable(t1)
+    assertResult(1)(controller.numScheduledTasks)
+    assertResult(taskMemoryBytes)(throttle.getTotalHostMemoryBytes)
+
+    val t2 = new TestTask(50)
+    controller.blockUntilRunnable(t2)
+    assertResult(2)(controller.numScheduledTasks)
+    assertResult(2 * taskMemoryBytes)(throttle.getTotalHostMemoryBytes)
+
+    controller.taskCompleted(t1)
+    assertResult(1)(controller.numScheduledTasks)
+    assertResult(taskMemoryBytes)(throttle.getTotalHostMemoryBytes)
+  }
+
+  test("schedule task with blocking") {
+    val taskMemoryBytes = 50
+    val t1 = new TestTask(taskMemoryBytes)
+    controller.blockUntilRunnable(t1)
+
+    val t2 = new TestTask(taskMemoryBytes)
+    controller.blockUntilRunnable(t2)
+
+    val t3 = new TestTask(taskMemoryBytes)
+    val f = executor.submit(new Runnable {
+      override def run(): Unit = controller.blockUntilRunnable(t3)
+    })
+    Thread.sleep(100)
+    assert(!f.isDone)
+
+    controller.taskCompleted(t1)
+    f.get(1, TimeUnit.SECONDS)
+  }
+
+  test("shutdown while blocking") {
+    val t1 = new TestTask(10)
+    controller.blockUntilRunnable(t1)
+
+    val t2 = new TestTask(110)
+
+    val f = executor.submit(new Runnable {
+      override def run(): Unit = {
+        controller.blockUntilRunnable(t2)
+      }
+    })
+
+    executor.shutdownNow()
+    try {
+      f.get(1, TimeUnit.SECONDS)
+      fail("Should be interrupted")
+    } catch {
+      case ee: ExecutionException =>
+        assert(ee.getCause.isInstanceOf[InterruptedException])
+      case _: Throwable => fail("Should be interrupted")
+    }
+  }
+}
diff --git a/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/spark332cdh/RapidsShuffleManager.scala b/sql-plugin/src/test/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimsSuite.scala
similarity index 54%
rename from sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/spark332cdh/RapidsShuffleManager.scala
rename to sql-plugin/src/test/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimsSuite.scala
index c080c52c9ba..2e871e625c1 100644
--- a/sql-plugin/src/main/spark332cdh/scala/com/nvidia/spark/rapids/spark332cdh/RapidsShuffleManager.scala
+++ b/sql-plugin/src/test/spark344/scala/com/nvidia/spark/rapids/shims/spark344/SparkShimsSuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,16 +15,21 @@
  */
 
 /*** spark-rapids-shim-json-lines
-{"spark": "332cdh"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
-package com.nvidia.spark.rapids.spark332cdh
+package com.nvidia.spark.rapids.shims.spark344
 
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.rapids.ProxyRapidsShuffleInternalManagerBase
+import com.nvidia.spark.rapids._
+import org.scalatest.funsuite.AnyFunSuite
 
-/** A shuffle manager optimized for the RAPIDS Plugin for Apache Spark. */
-sealed class RapidsShuffleManager(
-    conf: SparkConf,
-    isDriver: Boolean
-) extends ProxyRapidsShuffleInternalManagerBase(conf, isDriver)
+class SparkShimsSuite extends AnyFunSuite with FQSuiteName {
+  test("spark shims version") {
+    assert(ShimLoader.getShimVersion === SparkShimVersion(3, 4, 4))
+  }
 
+  test("shuffle manager class") {
+    assert(ShimLoader.getRapidsShuffleManagerClass ===
+      classOf[com.nvidia.spark.rapids.spark344.RapidsShuffleManager].getCanonicalName)
+  }
+
+}
diff --git a/sql-plugin/src/test/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimsSuite.scala b/sql-plugin/src/test/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimsSuite.scala
new file mode 100644
index 00000000000..d7b68794adb
--- /dev/null
+++ b/sql-plugin/src/test/spark353/scala/com/nvidia/spark/rapids/shims/spark353/SparkShimsSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "353"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims.spark353
+
+import com.nvidia.spark.rapids._
+import org.scalatest.funsuite.AnyFunSuite
+
+class SparkShimsSuite extends AnyFunSuite with FQSuiteName {
+  test("spark shims version") {
+    assert(ShimLoader.getShimVersion === SparkShimVersion(3, 5, 3))
+  }
+
+  test("shuffle manager class") {
+    assert(ShimLoader.getRapidsShuffleManagerClass ===
+      classOf[com.nvidia.spark.rapids.spark353.RapidsShuffleManager].getCanonicalName)
+  }
+
+}
diff --git a/tests/pom.xml b/tests/pom.xml
index 2832cd2feff..1efb8bdbaa2 100644
--- a/tests/pom.xml
+++ b/tests/pom.xml
@@ -21,13 +21,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-tests_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Tests</name>
     <description>RAPIDS plugin for Apache Spark integration tests</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>tests</rapids.module>
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionSuite.scala
index b3014b74368..2823193f7db 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionSuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionSuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -150,4 +150,29 @@ class RegularExpressionSuite extends SparkQueryCompareTestSuite {
     Charset.defaultCharset().name() == "UTF-8"
   }
 
+  private def extractStringsLineEnd(session: SparkSession): DataFrame = {
+    import session.sqlContext.implicits._
+    Seq[(String)](
+      ("123\n"),
+      ("123\n\u0085"),
+      ("123\r\n"),
+      ("123\n\n"),
+      ("123\u2029\n"),
+      ("456\n"),
+      ("456\r"),
+      ("456\r123"),
+      ("456\n\r"),
+      ("456\n\n"),
+      ("456\u2028\n"),
+      ("456\u2028")
+    ).toDF("strings")
+  }
+
+  testSparkResultsAreEqual("String regexp_extract end-of-line 1", extractStringsLineEnd,
+    conf = conf) {
+    frame =>
+      assume(isUnicodeEnabled())
+      frame.selectExpr("regexp_extract(strings, '(456(\r|\u2028)$|123\n$)', 0)")
+  }
+
 }
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala
index a60ea50ef4e..48756d33478 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionTranspilerSuite.scala
@@ -16,12 +16,13 @@
 package com.nvidia.spark.rapids
 
 import java.nio.charset.Charset
+import java.util.EnumSet
 import java.util.regex.Pattern
 
 import scala.collection.mutable.{HashSet, ListBuffer}
 import scala.util.{Random, Try}
 
-import ai.rapids.cudf.{CaptureGroups, ColumnVector, CudfException, RegexProgram}
+import ai.rapids.cudf.{CaptureGroups, ColumnVector, CudfException, RegexFlag, RegexProgram}
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.RegexParser.toReadableString
 import org.scalatest.funsuite.AnyFunSuite
@@ -334,10 +335,11 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
   }
 
   test("line anchor $ - find") {
-    val patterns = Seq("a$", "a$b", "\f$", "$\f")
+    val patterns = Seq("a$", "a$b", "\f$", "$\f","TEST$")
     val inputs = Seq("a", "a\n", "a\r", "a\r\n", "a\f", "\f", "\r", "\u0085", "\u2028",
-        "\u2029", "\n", "\r\n", "\r\n\r", "\r\n\u0085", "\n\r",
-        "\n\u0085", "\n\u2028", "\n\u2029", "2+|+??wD\n", "a\r\nb")
+      "\u2029", "\n", "\r\n", "\r\n\r", "\r\n\u0085", "\n\r",
+      "\n\u0085", "\n\u2028", "\n\u2029", "2+|+??wD\n", "a\r\nb",
+      "TEST\u0085\n", "TEST\u0085\r", "TEST\u2028\r","TEST\u2028\u2029", "TEST\u2028\r\n")
     assertCpuGpuMatchesRegexpFind(patterns, inputs)
     val unsupportedPatterns = Seq("[\r\n]?$", "$\r", "\r$",
       // "\u0085$", "\u2028$", "\u2029$", "\n$", "\r\n$", "[D$3]$")
@@ -459,7 +461,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
     doTranspileTest(TIMESTAMP_TRUNCATE_REGEX,
       TIMESTAMP_TRUNCATE_REGEX
         .replaceAll("\\.", "[^\n\r\u0085\u2028\u2029]")
-        .replaceAll("\\\\Z", "(?:\r|\u0085|\u2028|\u2029|\r\n)?\\$"))
+        .replaceAll("\\\\Z", "(?:\r\n)?\\$"))
   }
 
   test("transpile \\A repetitions") {
@@ -473,11 +475,11 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
   }
 
   test("transpile $") {
-    doTranspileTest("a$", "a(?:\r|\u0085|\u2028|\u2029|\r\n)?$")
+    doTranspileTest("a$", "a(?:\r\n)?$")
   }
 
   test("transpile \\Z") {
-    val expected = "a(?:\r|\u0085|\u2028|\u2029|\r\n)?$"
+    val expected = "a(?:\r\n)?$"
     doTranspileTest("a\\Z", expected)
     doTranspileTest("a\\Z+", expected)
     doTranspileTest("a\\Z{1}", expected)
@@ -537,7 +539,7 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
   }
 
   private val REGEXP_LIMITED_CHARS_COMMON = "|()[]{},-./;:!^$#%&*+?<=>@\"'~`_" +
-    "abc0123x\\ \t\r\n\f\u000b\u0000BsdwSDWzZ"
+    "abc0123x\\ \t\r\n\f\u000b\u0000BsdwSDWZ"
 
   private val REGEXP_LIMITED_CHARS_FIND = REGEXP_LIMITED_CHARS_COMMON
 
@@ -996,7 +998,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
   private def gpuContains(cudfPattern: String, input: Seq[String]): Array[Boolean] = {
     val result = new Array[Boolean](input.length)
     withResource(ColumnVector.fromStrings(input: _*)) { cv =>
-      val prog = new RegexProgram(cudfPattern, CaptureGroups.NON_CAPTURE)
+      val prog = new RegexProgram(cudfPattern,
+        EnumSet.of(RegexFlag.EXT_NEWLINE) ,CaptureGroups.NON_CAPTURE)
       withResource(cv.containsRe(prog)) { c =>
         withResource(c.copyToHost()) { hv =>
           result.indices.foreach(i => result(i) = hv.getBoolean(i))
@@ -1016,10 +1019,12 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
     val (hasBackrefs, converted) = GpuRegExpUtils.backrefConversion(replace)
     withResource(ColumnVector.fromStrings(input: _*)) { cv =>
       val c = if (hasBackrefs) {
-        cv.stringReplaceWithBackrefs(new RegexProgram(cudfPattern), converted)
+        cv.stringReplaceWithBackrefs(new RegexProgram(cudfPattern,
+          EnumSet.of(RegexFlag.EXT_NEWLINE)), converted)
       } else {
         withResource(GpuScalar.from(converted, DataTypes.StringType)) { replace =>
-          val prog = new RegexProgram(cudfPattern, CaptureGroups.NON_CAPTURE)
+          val prog = new RegexProgram(cudfPattern,
+            EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE)
           cv.replaceRegex(prog, replace)
         }
       }
@@ -1053,7 +1058,8 @@ class RegularExpressionTranspilerSuite extends AnyFunSuite {
       isRegex: Boolean): Seq[Array[String]] = {
     withResource(ColumnVector.fromStrings(input: _*)) { cv =>
       val x = if (isRegex) {
-        cv.stringSplitRecord(new RegexProgram(pattern, CaptureGroups.NON_CAPTURE), limit)
+        cv.stringSplitRecord(new RegexProgram(pattern,
+          EnumSet.of(RegexFlag.EXT_NEWLINE), CaptureGroups.NON_CAPTURE), limit)
       } else {
         cv.stringSplitRecord(pattern, limit)
       }
diff --git a/tests/src/test/scala/org/apache/spark/sql/rapids/OrcFilterSuite.scala b/tests/src/test/scala/org/apache/spark/sql/rapids/OrcFilterSuite.scala
index fe86900b32f..6d067800dde 100644
--- a/tests/src/test/scala/org/apache/spark/sql/rapids/OrcFilterSuite.scala
+++ b/tests/src/test/scala/org/apache/spark/sql/rapids/OrcFilterSuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,10 +18,11 @@ package org.apache.spark.sql.rapids
 
 import java.sql.Timestamp
 
-import com.nvidia.spark.rapids.{GpuFilterExec, SparkQueryCompareTestSuite}
+import com.nvidia.spark.rapids.{GpuFilterExec, RapidsConf, SparkQueryCompareTestSuite}
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.execution.{FilterExec, SparkPlan}
 
 class OrcFilterSuite extends SparkQueryCompareTestSuite {
 
@@ -39,22 +40,42 @@ class OrcFilterSuite extends SparkQueryCompareTestSuite {
 
   test("Support for pushing down filters for boolean types gpu write gpu read") {
     withTempPath { file =>
-      withGpuSparkSession(spark => {
-        val data = (0 until 10).map(i => Tuple1(i == 2))
-        val df = spark.createDataFrame(data).toDF("a")
-        df.repartition(10).write.orc(file.getCanonicalPath)
-        checkPredicatePushDown(spark, file.getCanonicalPath, 10, "a == true")
-      })
+      var gpuPlans: Array[SparkPlan] = Array.empty
+      val testConf = new SparkConf().set(
+        RapidsConf.TEST_ALLOWED_NONGPU.key,
+        "DataWritingCommandExec,ShuffleExchangeExec, WriteFilesExec")
+      ExecutionPlanCaptureCallback.startCapture()
+      try {
+        withGpuSparkSession(spark => {
+          val data = (0 until 10).map(i => Tuple1(i == 2))
+          val df = spark.createDataFrame(data).toDF("a")
+          df.repartition(10).write.orc(file.getCanonicalPath)
+          checkPredicatePushDown(spark, file.getCanonicalPath, 10, "a == true")
+        }, testConf)
+      } finally {
+          gpuPlans = ExecutionPlanCaptureCallback.getResultsWithTimeout()
+      }
+      ExecutionPlanCaptureCallback.assertDidFallBack(gpuPlans.head, "DataWritingCommandExec")
     }
   }
 
   test("Support for pushing down filters for boolean types gpu write cpu read") {
     withTempPath { file =>
-      withGpuSparkSession(spark => {
-        val data = (0 until 10).map(i => Tuple1(i == 2))
-        val df = spark.createDataFrame(data).toDF("a")
-        df.repartition(10).write.orc(file.getCanonicalPath)
-      })
+      var gpuPlans: Array[SparkPlan] = Array.empty
+      val testConf = new SparkConf().set(
+        RapidsConf.TEST_ALLOWED_NONGPU.key,
+        "DataWritingCommandExec,ShuffleExchangeExec, WriteFilesExec")
+      ExecutionPlanCaptureCallback.startCapture()
+      try {
+        withGpuSparkSession(spark => {
+          val data = (0 until 10).map(i => Tuple1(i == 2))
+          val df = spark.createDataFrame(data).toDF("a")
+          df.repartition(10).write.orc(file.getCanonicalPath)
+        }, testConf)
+      } finally {
+          gpuPlans = ExecutionPlanCaptureCallback.getResultsWithTimeout()
+        }
+      ExecutionPlanCaptureCallback.assertDidFallBack(gpuPlans.head, "DataWritingCommandExec")
       withCpuSparkSession(spark => {
         checkPredicatePushDown(spark, file.getCanonicalPath, 10, "a == true")
       })
diff --git a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala
index cad9da2d33a..26ff0de8606 100644
--- a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala
+++ b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/ConcurrentWriterMetricsSuite.scala
@@ -35,6 +35,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala
index 31e269affa4..b3b32ccb9f2 100644
--- a/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala
+++ b/tests/src/test/spark320/scala/com/nvidia/spark/rapids/shims/OrcStatisticShim.scala
@@ -32,9 +32,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
diff --git a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala
index 722e5bb215b..e303c207f63 100644
--- a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala
+++ b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/DynamicPruningSuite.scala
@@ -28,6 +28,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala
index 2fd72a5ac0f..1dc40a9589f 100644
--- a/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala
+++ b/tests/src/test/spark321/scala/com/nvidia/spark/rapids/OrcEncryptionSuite.scala
@@ -28,6 +28,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala
index ccdd4362dee..3958dce6fdb 100644
--- a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala
+++ b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedReaderSuite.scala
@@ -28,6 +28,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
 
@@ -112,7 +113,7 @@ class RapidsShuffleThreadedReaderSuite
       val shuffleId = 22
       val numMaps = 6
       val keyValuePairsPerMap = 10
-      val serializer = new GpuColumnarBatchSerializer(NoopMetric)
+      val serializer = new GpuColumnarBatchSerializer(NoopMetric, Array.empty, false)
 
       // Make a mock BlockManager that will return RecordingManagedByteBuffers of data, so that we
       // can ensure retain() and release() are properly called.
diff --git a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala
index 818aa84bb3b..455b3b670c2 100644
--- a/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala
+++ b/tests/src/test/spark321/scala/org/apache/spark/sql/rapids/RapidsShuffleThreadedWriterSuite.scala
@@ -28,6 +28,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala
index 7fc26e76005..46d72d2ff6d 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/BloomFilterAggregateQuerySuite.scala
@@ -29,6 +29,8 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
+{"spark": "350db143"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala
index e812fca9449..6782ff1fba0 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/CsvScanForIntervalSuite.scala
@@ -24,6 +24,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala
index f63666c6fee..53844486851 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/GpuIntervalUtilsTest.scala
@@ -23,6 +23,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala
index 0d4fcea4d05..640167a66ff 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalArithmeticSuite.scala
@@ -24,6 +24,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala
index 28654db3036..43d8943ca14 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalCastSuite.scala
@@ -23,6 +23,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala
index 4a8ad0778ed..4e5756650a4 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalDivisionSuite.scala
@@ -24,6 +24,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala
index b560ec87ecf..1bd7d8e60a4 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalMultiplySuite.scala
@@ -24,6 +24,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala
index b4e1b9d3e67..b0348cd086a 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/IntervalSuite.scala
@@ -23,6 +23,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala
index 448e0260029..ee29a4c8a0c 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/ParquetUDTSuite.scala
@@ -24,6 +24,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala
index bf19779c8a0..2e022fe8e93 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/SampleSuite.scala
@@ -23,6 +23,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala
index 8b4c73a92d8..06e60b4ec4a 100644
--- a/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala
+++ b/tests/src/test/spark330/scala/com/nvidia/spark/rapids/TimestampSuite.scala
@@ -23,6 +23,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
 
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
index 67dcbbc2b58..89262ef5bbe 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
@@ -26,9 +26,11 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetDeltaLengthByteArrayEncodingSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetDeltaLengthByteArrayEncodingSuite.scala
index 95b3b451068..2cbf2d3b52e 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetDeltaLengthByteArrayEncodingSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetDeltaLengthByteArrayEncodingSuite.scala
@@ -15,7 +15,7 @@
  */
 
 /*** spark-rapids-shim-json-lines
- {"spark": "330"}
+{"spark": "330"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetInteroperabilitySuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetInteroperabilitySuite.scala
index eb22c1e3019..b4b9f64975d 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetInteroperabilitySuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetInteroperabilitySuite.scala
@@ -19,9 +19,126 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import java.io.File
+import java.time.ZoneOffset
+
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.PathFilter
+import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
+
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader
 import org.apache.spark.sql.execution.datasources.parquet.ParquetInteroperabilitySuite
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
+
 class RapidsParquetInteroperabilitySuite
   extends ParquetInteroperabilitySuite
-  with RapidsSQLTestsBaseTrait {}
+  with RapidsSQLTestsBaseTrait {
+  test("parquet timestamp conversion Rapids"){
+    // Make a table with one parquet file written by impala, and one parquet file written by spark.
+    // We should only adjust the timestamps in the impala file, and only if the conf is set
+    val impalaFile = "test-data/impala_timestamp.parq"
+
+    // here are the timestamps in the impala file, as they were saved by impala
+    val impalaFileData =
+      Seq(
+        "2001-01-01 01:01:01",
+        "2002-02-02 02:02:02",
+        "2003-03-03 03:03:03"
+      ).map(java.sql.Timestamp.valueOf)
+    withTempPath { tableDir =>
+      val ts = Seq(
+        "2004-04-04 04:04:04",
+        "2005-05-05 05:05:05",
+        "2006-06-06 06:06:06"
+      ).map { s => java.sql.Timestamp.valueOf(s) }
+      import testImplicits._
+      // match the column names of the file from impala
+      val df = spark.createDataset(ts).toDF().repartition(1).withColumnRenamed("value", "ts")
+      df.write.parquet(tableDir.getAbsolutePath)
+      FileUtils.copyFile(new File(testFile(impalaFile)), new File(tableDir, "part-00001.parq"))
+
+      Seq(false, true).foreach { int96TimestampConversion =>
+        withAllParquetReaders {
+          withSQLConf(
+            (SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
+              SQLConf.ParquetOutputTimestampType.INT96.toString),
+            (SQLConf.PARQUET_INT96_TIMESTAMP_CONVERSION.key, int96TimestampConversion.toString())
+          ) {
+            val readBack = spark.read.parquet(tableDir.getAbsolutePath).collect()
+            assert(readBack.size === 6)
+            // if we apply the conversion, we'll get the "right" values, as saved by impala in the
+            // original file.  Otherwise, they're off by the local timezone offset, set to
+            // America/Los_Angeles in tests
+            val impalaExpectations = if (int96TimestampConversion) {
+              impalaFileData
+            } else {
+              impalaFileData.map { ts =>
+                DateTimeUtils.toJavaTimestamp(DateTimeUtils.convertTz(
+                  DateTimeUtils.fromJavaTimestamp(ts),
+                  ZoneOffset.UTC,
+                  DateTimeUtils.getZoneId(conf.sessionLocalTimeZone)))
+              }
+            }
+            val fullExpectations = (ts ++ impalaExpectations).map(_.toString).sorted.toArray
+            val actual = readBack.map(_.getTimestamp(0).toString).sorted
+            withClue(
+              s"int96TimestampConversion = $int96TimestampConversion; " +
+                s"vectorized = ${SQLConf.get.parquetVectorizedReaderEnabled}") {
+              assert(fullExpectations === actual)
+
+              // Now test that the behavior is still correct even with a filter which could get
+              // pushed down into parquet.  We don't need extra handling for pushed down
+              // predicates because (a) in ParquetFilters, we ignore TimestampType and (b) parquet
+              // does not read statistics from int96 fields, as they are unsigned.  See
+              // scalastyle:off line.size.limit
+              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L419
+              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L348
+              // scalastyle:on line.size.limit
+              //
+              // Just to be defensive in case anything ever changes in parquet, this test checks
+              // the assumption on column stats, and also the end-to-end behavior.
+
+              val hadoopConf = spark.sessionState.newHadoopConf()
+              val fs = new Path(tableDir.getAbsolutePath).getFileSystem(hadoopConf)
+              val parts = fs.listStatus(new Path(tableDir.getAbsolutePath), new PathFilter {
+                override def accept(path: Path): Boolean = !path.getName.startsWith("_")
+              })
+              // grab the meta data from the parquet file.  The next section of asserts just make
+              // sure the test is configured correctly.
+              assert(parts.size == 2)
+              parts.foreach { part =>
+                val oneFooter =
+                  ParquetFooterReader.readFooter(hadoopConf, part.getPath, NO_FILTER)
+                assert(oneFooter.getFileMetaData.getSchema.getColumns.size === 1)
+                val typeName = oneFooter
+                  .getFileMetaData.getSchema.getColumns.get(0).getPrimitiveType.getPrimitiveTypeName
+                assert(typeName === PrimitiveTypeName.INT96)
+                val oneBlockMeta = oneFooter.getBlocks().get(0)
+                val oneBlockColumnMeta = oneBlockMeta.getColumns().get(0)
+                // This is the important assert.  Column stats are written, but they are ignored
+                // when the data is read back as mentioned above, b/c int96 is unsigned.  This
+                // assert makes sure this holds even if we change parquet versions (if e.g. there
+                // were ever statistics even on unsigned columns).
+                assert(!oneBlockColumnMeta.getStatistics.hasNonNullValue)
+              }
+
+              // These queries should return the entire dataset with the conversion applied,
+              // but if the predicates were applied to the raw values in parquet, they would
+              // incorrectly filter data out.
+              val query = spark.read.parquet(tableDir.getAbsolutePath)
+                .where("ts > '2001-01-01 01:00:00'")
+              val countWithFilter = query.count()
+              val exp = if (int96TimestampConversion) 6 else 5
+              assert(countWithFilter === exp, query)
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetQuerySuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetQuerySuite.scala
index 6dfc44d4bfa..4ce44ddfd73 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetQuerySuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetQuerySuite.scala
@@ -19,9 +19,33 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import com.nvidia.spark.rapids.GpuFilterExec
+
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.datasources.parquet.ParquetQuerySuite
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
-class RapidsParquetQuerySuite
-  extends ParquetQuerySuite
-  with RapidsSQLTestsBaseTrait {}
+class RapidsParquetQuerySuite extends ParquetQuerySuite with RapidsSQLTestsBaseTrait {
+  import testImplicits._
+
+  test("SPARK-26677: negated null-safe equality comparison should not filter " +
+    "matched row groupsn Rapids") {
+    withAllParquetReaders {
+      withTempPath { path =>
+        // Repeated values for dictionary encoding.
+        Seq(Some("A"), Some("A"), None).toDF.repartition(1)
+          .write.parquet(path.getAbsolutePath)
+        val df = spark.read.parquet(path.getAbsolutePath)
+        checkAnswer(stripSparkFilterRapids(df.where("NOT (value <=> 'A')")), df)
+      }
+    }
+  }
+
+  def stripSparkFilterRapids(df: DataFrame): DataFrame = {
+    val schema = df.schema
+    val withoutFilters = df.queryExecution.executedPlan.transform {
+      case GpuFilterExec(_, child) => child
+    }
+    spark.internalCreateDataFrame(withoutFilters.execute(), schema)
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetRebaseDatetimeSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetRebaseDatetimeSuite.scala
index 4e7cd659954..e680c0f5ea5 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetRebaseDatetimeSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetRebaseDatetimeSuite.scala
@@ -19,9 +19,73 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import org.apache.spark.SparkException
+import org.apache.spark.SparkUpgradeException
 import org.apache.spark.sql.execution.datasources.parquet.ParquetRebaseDatetimeSuite
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION
+import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType.{INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS}
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
 class RapidsParquetRebaseDatetimeSuite
   extends ParquetRebaseDatetimeSuite
-    with RapidsSQLTestsBaseTrait {}
+    with RapidsSQLTestsBaseTrait {
+
+  import testImplicits._
+
+  test("SPARK-35427: datetime rebasing in the EXCEPTION mode Rapids") {
+    def checkTsWrite(): Unit = {
+      withTempPath { dir =>
+        val df = Seq("1001-01-01 01:02:03.123")
+          .toDF("str")
+          .select($"str".cast("timestamp").as("dt"))
+        val e = intercept[SparkException] {
+          df.write.parquet(dir.getCanonicalPath)
+        }
+        val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+        assert(errMsg.contains("You may get a different result due to the upgrading"))
+      }
+    }
+    withAllParquetWriters {
+      withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
+        Seq(TIMESTAMP_MICROS, TIMESTAMP_MILLIS).foreach { tsType =>
+          withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsType.toString) {
+            checkTsWrite()
+          }
+        }
+        withTempPath { dir =>
+          val df = Seq(java.sql.Date.valueOf("1001-01-01")).toDF("dt")
+          val e = intercept[SparkException] {
+            df.write.parquet(dir.getCanonicalPath)
+          }
+          val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+          assert(errMsg.contains("You may get a different result due to the upgrading"))
+        }
+      }
+      withSQLConf(
+        SQLConf.PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString,
+        SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> INT96.toString) {
+        checkTsWrite()
+      }
+    }
+
+    def checkRead(fileName: String): Unit = {
+      val e = intercept[SparkException] {
+        spark.read.parquet(testFile("test-data/" + fileName)).collect()
+      }
+      val errMsg = e.getCause.asInstanceOf[SparkUpgradeException].getMessage
+      assert(errMsg.contains("You may get a different result due to the upgrading"))
+    }
+    withAllParquetWriters {
+      withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_READ.key -> EXCEPTION.toString) {
+        Seq(
+          "before_1582_date_v2_4_5.snappy.parquet",
+          "before_1582_timestamp_micros_v2_4_5.snappy.parquet",
+          "before_1582_timestamp_millis_v2_4_5.snappy.parquet").foreach(checkRead)
+      }
+      withSQLConf(SQLConf.PARQUET_INT96_REBASE_MODE_IN_READ.key -> EXCEPTION.toString) {
+        checkRead("before_1582_timestamp_int96_dict_v2_4_5.snappy.parquet")
+      }
+    }
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
index 689448fb7f0..76b5563de4f 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetSchemaPruningSuite.scala
@@ -19,9 +19,33 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruningSuite
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
 class RapidsParquetSchemaPruningSuite
   extends ParquetSchemaPruningSuite
-  with RapidsSQLTestsBaseTrait {}
+  with RapidsSQLTestsBaseTrait {
+
+  override protected def checkScanSchemata(df: DataFrame,
+                                           expectedSchemaCatalogStrings: String*): Unit = {
+    val fileSourceScanSchemata =
+      collect(df.queryExecution.executedPlan) {
+        case scan: FileSourceScanExec => scan.requiredSchema
+        case gpuScan: GpuFileSourceScanExec => gpuScan.requiredSchema
+      }
+    assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
+      s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
+        s"but expected $expectedSchemaCatalogStrings")
+    fileSourceScanSchemata.zip(expectedSchemaCatalogStrings).foreach {
+      case (scanSchema, expectedScanSchemaCatalogString) =>
+        val expectedScanSchema = CatalystSqlParser.parseDataType(expectedScanSchemaCatalogString)
+        implicit val equality = schemaEquality
+        assert(scanSchema === expectedScanSchema)
+    }
+
+  }
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetThriftCompatibilitySuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetThriftCompatibilitySuite.scala
index 5353bd139c3..e1133e7387f 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetThriftCompatibilitySuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsParquetThriftCompatibilitySuite.scala
@@ -19,9 +19,56 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.datasources.parquet.ParquetThriftCompatibilitySuite
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsBaseTrait
 
 class RapidsParquetThriftCompatibilitySuite
   extends ParquetThriftCompatibilitySuite
-  with RapidsSQLTestsBaseTrait {}
+  with RapidsSQLTestsBaseTrait {
+
+  test("Read Parquet file generated by parquet-thrift Rapids") {
+
+    val parquetFilePath =
+      "test-data/parquet-thrift-compat.snappy.parquet"
+
+    checkAnswer(spark.read.parquet(testFile(parquetFilePath)), (0 until 10).map { i =>
+      val suits = Array("SPADES", "HEARTS", "DIAMONDS", "CLUBS")
+
+      val nonNullablePrimitiveValues = Seq(
+        i % 2 == 0,
+        i.toByte,
+        (i + 1).toShort,
+        i + 2,
+        i.toLong * 10,
+        i.toDouble + 0.2d,
+        // Thrift `BINARY` values are actually unencoded `STRING` values, and thus are always
+        // treated as `BINARY (UTF8)` in parquet-thrift, since parquet-thrift always assume
+        // Thrift `STRING`s are encoded using UTF-8.
+        s"val_$i",
+        s"val_$i",
+        // Thrift ENUM values are converted to Parquet binaries containing UTF-8 strings
+        suits(i % 4))
+
+      val nullablePrimitiveValues = if (i % 3 == 0) {
+        Seq.fill(nonNullablePrimitiveValues.length)(null)
+      } else {
+        nonNullablePrimitiveValues
+      }
+
+      val complexValues = Seq(
+        Seq.tabulate(3)(n => s"arr_${i + n}"),
+        // Thrift `SET`s are converted to Parquet `LIST`s
+        Seq(i),
+        Seq.tabulate(3)(n => (i + n: Integer) -> s"val_${i + n}").toMap,
+        Seq.tabulate(3) { n =>
+          (i + n) -> Seq.tabulate(3) { m =>
+            Row(Seq.tabulate(3)(j => i + j + m), s"val_${i + m}")
+          }
+        }.toMap)
+
+      Row(nonNullablePrimitiveValues ++ nullablePrimitiveValues ++ complexValues: _*)
+    })
+  }
+
+}
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
index a57b7802c9d..6e35d568aca 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
@@ -141,6 +141,12 @@ abstract class BackendTestSettings {
       this
     }
 
+    def excludeBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = {
+      exclusion.add(ExcludeBySuffix(suffixes))
+      excludeReasons.add(reason)
+      this
+    }
+
     def includeRapidsTestsByPrefix(prefixes: String*): SuiteSettings = {
       inclusion.add(IncludeRapidsTestByPrefix(prefixes: _*))
       this
@@ -152,6 +158,12 @@ abstract class BackendTestSettings {
       this
     }
 
+    def excludeRapidsTestsBySuffix(suffixes: String, reason: ExcludeReason): SuiteSettings = {
+      exclusion.add(ExcludeRadpisTestByPrefix(suffixes))
+      excludeReasons.add(reason)
+      this
+    }
+
     def includeAllRapidsTests(): SuiteSettings = {
       inclusion.add(IncludeByPrefix(RAPIDS_TEST))
       this
@@ -159,6 +171,7 @@ abstract class BackendTestSettings {
 
     def excludeAllRapidsTests(reason: ExcludeReason): SuiteSettings = {
       exclusion.add(ExcludeByPrefix(RAPIDS_TEST))
+      exclusion.add(ExcludeBySuffix(RAPIDS_TEST))
       excludeReasons.add(reason)
       this
     }
@@ -210,6 +223,15 @@ abstract class BackendTestSettings {
     }
   }
 
+  private case class ExcludeBySuffix(suffixes: String*) extends ExcludeBase {
+    override def isExcluded(testName: String): Boolean = {
+      if (suffixes.exists(suffix => testName.endsWith(suffix))) {
+        return true
+      }
+      false
+    }
+  }
+
   private case class IncludeRapidsTestByPrefix(prefixes: String*) extends IncludeBase {
     override def isIncluded(testName: String): Boolean = {
       if (prefixes.exists(prefix => testName.startsWith(RAPIDS_TEST + prefix))) {
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 8b76e350fef..b0eb63181d9 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -47,20 +47,10 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("from_json - input=empty array, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10907"))
     .exclude("from_json - input=empty object, schema=struct, output=single row with null", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10910"))
     .exclude("SPARK-20549: from_json bad UTF-8", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10911"))
-    .exclude("to_json - array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10915"))
-    .exclude("to_json - array with single empty row", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10923"))
-    .exclude("to_json - empty array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10924"))
-    .exclude("SPARK-21513: to_json support map[string, struct] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10916"))
     .exclude("SPARK-21513: to_json support map[struct, struct] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10918"))
-    .exclude("SPARK-21513: to_json support map[string, integer] to json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10919"))
-    .exclude("to_json - array with maps", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10920"))
-    .exclude("to_json - array with single map", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10921"))
     .exclude("from_json missing fields", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10922"))
   enableSuite[RapidsJsonFunctionsSuite]
     .exclude("from_json invalid json", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10891"))
-    .exclude("to_json - array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10894"))
-    .exclude("to_json - map", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10895"))
-    .exclude("to_json - array of primitive types", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10896"))
     .exclude("SPARK-33134: return partial results only for root JSON objects", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10901"))
   enableSuite[RapidsJsonSuite]
     .exclude("SPARK-32810: JSON data source should be able to read files with escaped glob metacharacter in the paths", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
@@ -73,7 +63,7 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-33428 conv function shouldn't raise error if input string is too big", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11142"))
     .exclude("SPARK-36229 conv should return result equal to -1 in base of toBase", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11142"))
   enableSuite[RapidsParquetAvroCompatibilitySuite]
-    .exclude("SPARK-10136 array of primitive array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11401"))
+    .exclude("SPARK-10136 array of primitive array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11592"))
   enableSuite[RapidsParquetColumnIndexSuite]
   enableSuite[RapidsParquetCompressionCodecPrecedenceSuite]
     .exclude("Create parquet table with compression", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11416"))
@@ -82,36 +72,39 @@ class RapidsTestSettings extends BackendTestSettings {
   enableSuite[RapidsParquetDeltaEncodingLong]
   enableSuite[RapidsParquetDeltaLengthByteArrayEncodingSuite]
   enableSuite[RapidsParquetFileFormatSuite]
-    .excludeByPrefix("Propagate Hadoop configs from", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11402"))
+    .excludeByPrefix("Propagate Hadoop configs from", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11602"))
   enableSuite[RapidsParquetFieldIdIOSuite]
   enableSuite[RapidsParquetFieldIdSchemaSuite]
   enableSuite[RapidsParquetInteroperabilitySuite]
     .exclude("SPARK-36803: parquet files with legacy mode and schema evolution", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11454"))
-    .exclude("parquet timestamp conversion", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11448"))
+    .exclude("parquet timestamp conversion", ADJUST_UT("impala_timestamp cannot be found"))
   enableSuite[RapidsParquetPartitionDiscoverySuite]
-    .exclude("Various partition value types", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11430"))
+    .exclude("Various partition value types", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11583"))
   enableSuite[RapidsParquetProtobufCompatibilitySuite]
-    .exclude("struct with unannotated array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11379"))
-    .exclude("unannotated array of struct with unannotated array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11379"))
+    .exclude("struct with unannotated array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11475"))
+    .exclude("unannotated array of struct with unannotated array", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11476"))
   enableSuite[RapidsParquetQuerySuite]
-    .exclude("SPARK-26677: negated null-safe equality comparison should not filter matched row groups", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11403"))
+    .exclude("SPARK-26677: negated null-safe equality comparison should not filter matched row groups", ADJUST_UT("fetches the CPU version of Execution Plan instead of the GPU version."))
     .exclude("SPARK-34212 Parquet should read decimals correctly", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11433"))
   enableSuite[RapidsParquetRebaseDatetimeSuite]
-    .exclude("SPARK-31159, SPARK-37705: compatibility with Spark 2.4/3.2 in reading dates/timestamps", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
-    .exclude("SPARK-31159, SPARK-37705: rebasing timestamps in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
-    .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
-    .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11404"))
+    .exclude("SPARK-31159, SPARK-37705: compatibility with Spark 2.4/3.2 in reading dates/timestamps", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11599"))
+    .exclude("SPARK-31159, SPARK-37705: rebasing timestamps in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11593"))
+    .exclude("SPARK-31159: rebasing dates in write", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11480"))
+    .exclude("SPARK-35427: datetime rebasing in the EXCEPTION mode", ADJUST_UT("original test case inherited from Spark cannot find the needed local resources"))
   enableSuite[RapidsParquetSchemaPruningSuite]
-    .excludeByPrefix("Spark vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Non-vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-insensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
-    .excludeByPrefix("Case-sensitive parser", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11405"))
+    .excludeBySuffix("select a single complex field", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11619"))
+    .excludeBySuffix("select a single complex field and the partition column", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11620"))
+    .excludeBySuffix("select missing subfield", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11621"))
+    .excludeBySuffix("select explode of nested field of array of struct", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11653"))
+    .excludeBySuffix("empty schema intersection", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11627"))
+    .excludeBySuffix("select one deep nested complex field after join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11628"))
+    .excludeBySuffix("select one deep nested complex field after outer join", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11629"))
   enableSuite[RapidsParquetSchemaSuite]
     .exclude("schema mismatch failure error message for parquet reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11434"))
     .exclude("schema mismatch failure error message for parquet vectorized reader", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11446"))
   enableSuite[RapidsParquetThriftCompatibilitySuite]
-    .exclude("Read Parquet file generated by parquet-thrift", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11381"))
-    .exclude("SPARK-10136 list of primitive list", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11381"))
+    .exclude("Read Parquet file generated by parquet-thrift", ADJUST_UT("https://github.com/NVIDIA/spark-rapids/pull/11591"))
+    .exclude("SPARK-10136 list of primitive list", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/11589"))
   enableSuite[RapidsParquetVectorizedSuite]
   enableSuite[RapidsRegexpExpressionsSuite]
   enableSuite[RapidsStringExpressionsSuite]
diff --git a/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala b/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala
index b508af3021c..89c317f7620 100644
--- a/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala
+++ b/tests/src/test/spark340/scala/com/nvidia/spark/rapids/shuffle/RapidsShuffleTestHelper.scala
@@ -20,9 +20,12 @@
 {"spark": "341db"}
 {"spark": "342"}
 {"spark": "343"}
+{"spark": "344"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shuffle
diff --git a/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala b/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala
index d6fa7dfdd27..7e802d89764 100644
--- a/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala
+++ b/tests/src/test/spark341db/scala/com/nvidia/spark/rapids/ToPrettyStringSuite.scala
@@ -17,8 +17,10 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "341db"}
 {"spark": "350"}
+{"spark": "350db143"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids
diff --git a/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala b/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala
index 68f8f83f9aa..f95d8862a30 100644
--- a/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala
+++ b/tests/src/test/spark350/scala/org/apache/spark/sql/rapids/GpuCreateDataSourceTableAsSelectCommandSuite.scala
@@ -18,6 +18,7 @@
 {"spark": "350"}
 {"spark": "351"}
 {"spark": "352"}
+{"spark": "353"}
 {"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids
diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/320/operatorsScore.csv
+++ b/tools/generated_files/320/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/320/supportedDataSource.csv b/tools/generated_files/320/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/320/supportedDataSource.csv
+++ b/tools/generated_files/320/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/320/supportedExprs.csv
+++ b/tools/generated_files/320/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/321/operatorsScore.csv
+++ b/tools/generated_files/321/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/321/supportedDataSource.csv b/tools/generated_files/321/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/321/supportedDataSource.csv
+++ b/tools/generated_files/321/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/321/supportedExprs.csv
+++ b/tools/generated_files/321/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/321cdh/operatorsScore.csv
+++ b/tools/generated_files/321cdh/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/321cdh/supportedDataSource.csv b/tools/generated_files/321cdh/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/321cdh/supportedDataSource.csv
+++ b/tools/generated_files/321cdh/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/321cdh/supportedExprs.csv
+++ b/tools/generated_files/321cdh/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/322/operatorsScore.csv
+++ b/tools/generated_files/322/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/322/supportedDataSource.csv b/tools/generated_files/322/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/322/supportedDataSource.csv
+++ b/tools/generated_files/322/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/322/supportedExprs.csv
+++ b/tools/generated_files/322/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/323/operatorsScore.csv
+++ b/tools/generated_files/323/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/323/supportedDataSource.csv b/tools/generated_files/323/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/323/supportedDataSource.csv
+++ b/tools/generated_files/323/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/323/supportedExprs.csv
+++ b/tools/generated_files/323/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/324/operatorsScore.csv
+++ b/tools/generated_files/324/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/324/supportedDataSource.csv b/tools/generated_files/324/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/324/supportedDataSource.csv
+++ b/tools/generated_files/324/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/324/supportedExprs.csv
+++ b/tools/generated_files/324/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv
index b2a85a45dcb..e5978fb9f1a 100644
--- a/tools/generated_files/330/operatorsScore.csv
+++ b/tools/generated_files/330/operatorsScore.csv
@@ -190,6 +190,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/330/supportedDataSource.csv b/tools/generated_files/330/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/330/supportedDataSource.csv
+++ b/tools/generated_files/330/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv
index aa3db4be4ab..0073281cb32 100644
--- a/tools/generated_files/330/supportedExprs.csv
+++ b/tools/generated_files/330/supportedExprs.csv
@@ -297,8 +297,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -380,6 +380,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv
index b2a85a45dcb..e5978fb9f1a 100644
--- a/tools/generated_files/330cdh/operatorsScore.csv
+++ b/tools/generated_files/330cdh/operatorsScore.csv
@@ -190,6 +190,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/330cdh/supportedDataSource.csv b/tools/generated_files/330cdh/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/330cdh/supportedDataSource.csv
+++ b/tools/generated_files/330cdh/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv
index aa3db4be4ab..0073281cb32 100644
--- a/tools/generated_files/330cdh/supportedExprs.csv
+++ b/tools/generated_files/330cdh/supportedExprs.csv
@@ -297,8 +297,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -380,6 +380,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv
index ca40757f61a..b988344e702 100644
--- a/tools/generated_files/331/operatorsScore.csv
+++ b/tools/generated_files/331/operatorsScore.csv
@@ -191,6 +191,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/331/supportedDataSource.csv b/tools/generated_files/331/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/331/supportedDataSource.csv
+++ b/tools/generated_files/331/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv
index 4d53f6b86f0..f62af4c9513 100644
--- a/tools/generated_files/331/supportedExprs.csv
+++ b/tools/generated_files/331/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -382,6 +382,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv
index ca40757f61a..b988344e702 100644
--- a/tools/generated_files/332/operatorsScore.csv
+++ b/tools/generated_files/332/operatorsScore.csv
@@ -191,6 +191,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/332/supportedDataSource.csv b/tools/generated_files/332/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/332/supportedDataSource.csv
+++ b/tools/generated_files/332/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv
index 4d53f6b86f0..f62af4c9513 100644
--- a/tools/generated_files/332/supportedExprs.csv
+++ b/tools/generated_files/332/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -382,6 +382,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv
index ca40757f61a..b988344e702 100644
--- a/tools/generated_files/332cdh/operatorsScore.csv
+++ b/tools/generated_files/332cdh/operatorsScore.csv
@@ -191,6 +191,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/332cdh/supportedDataSource.csv b/tools/generated_files/332cdh/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/332cdh/supportedDataSource.csv
+++ b/tools/generated_files/332cdh/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv
index 4d53f6b86f0..f62af4c9513 100644
--- a/tools/generated_files/332cdh/supportedExprs.csv
+++ b/tools/generated_files/332cdh/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -382,6 +382,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv
index ca40757f61a..b988344e702 100644
--- a/tools/generated_files/333/operatorsScore.csv
+++ b/tools/generated_files/333/operatorsScore.csv
@@ -191,6 +191,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/333/supportedDataSource.csv b/tools/generated_files/333/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/333/supportedDataSource.csv
+++ b/tools/generated_files/333/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv
index 4d53f6b86f0..f62af4c9513 100644
--- a/tools/generated_files/333/supportedExprs.csv
+++ b/tools/generated_files/333/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -382,6 +382,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv
index ca40757f61a..b988344e702 100644
--- a/tools/generated_files/334/operatorsScore.csv
+++ b/tools/generated_files/334/operatorsScore.csv
@@ -191,6 +191,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/334/supportedDataSource.csv b/tools/generated_files/334/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/334/supportedDataSource.csv
+++ b/tools/generated_files/334/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv
index 4d53f6b86f0..f62af4c9513 100644
--- a/tools/generated_files/334/supportedExprs.csv
+++ b/tools/generated_files/334/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -382,6 +382,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv
index 91b7bad076b..b1e9198e58b 100644
--- a/tools/generated_files/340/operatorsScore.csv
+++ b/tools/generated_files/340/operatorsScore.csv
@@ -193,6 +193,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/340/supportedDataSource.csv b/tools/generated_files/340/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/340/supportedDataSource.csv
+++ b/tools/generated_files/340/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv
index 77da153357f..01a48b40249 100644
--- a/tools/generated_files/340/supportedExprs.csv
+++ b/tools/generated_files/340/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv
index 91b7bad076b..b1e9198e58b 100644
--- a/tools/generated_files/341/operatorsScore.csv
+++ b/tools/generated_files/341/operatorsScore.csv
@@ -193,6 +193,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/341/supportedDataSource.csv b/tools/generated_files/341/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/341/supportedDataSource.csv
+++ b/tools/generated_files/341/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv
index 77da153357f..01a48b40249 100644
--- a/tools/generated_files/341/supportedExprs.csv
+++ b/tools/generated_files/341/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv
index 91b7bad076b..b1e9198e58b 100644
--- a/tools/generated_files/342/operatorsScore.csv
+++ b/tools/generated_files/342/operatorsScore.csv
@@ -193,6 +193,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/342/supportedDataSource.csv b/tools/generated_files/342/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/342/supportedDataSource.csv
+++ b/tools/generated_files/342/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv
index 77da153357f..01a48b40249 100644
--- a/tools/generated_files/342/supportedExprs.csv
+++ b/tools/generated_files/342/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv
index 91b7bad076b..b1e9198e58b 100644
--- a/tools/generated_files/343/operatorsScore.csv
+++ b/tools/generated_files/343/operatorsScore.csv
@@ -193,6 +193,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/343/supportedDataSource.csv b/tools/generated_files/343/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/343/supportedDataSource.csv
+++ b/tools/generated_files/343/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv
index 77da153357f..01a48b40249 100644
--- a/tools/generated_files/343/supportedExprs.csv
+++ b/tools/generated_files/343/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/344/operatorsScore.csv b/tools/generated_files/344/operatorsScore.csv
new file mode 100644
index 00000000000..b1e9198e58b
--- /dev/null
+++ b/tools/generated_files/344/operatorsScore.csv
@@ -0,0 +1,293 @@
+CPUOperator,Score
+CoalesceExec,3.0
+CollectLimitExec,3.0
+ExpandExec,3.0
+FileSourceScanExec,3.0
+FilterExec,2.8
+GenerateExec,3.0
+GlobalLimitExec,3.0
+LocalLimitExec,3.0
+ProjectExec,3.0
+RangeExec,3.0
+SampleExec,3.0
+SortExec,8.0
+SubqueryBroadcastExec,3.0
+TakeOrderedAndProjectExec,3.0
+UnionExec,3.0
+AQEShuffleReadExec,3.0
+HashAggregateExec,4.5
+ObjectHashAggregateExec,3.0
+SortAggregateExec,3.0
+InMemoryTableScanExec,3.0
+DataWritingCommandExec,3.0
+ExecutedCommandExec,3.0
+WriteFilesExec,3.0
+AppendDataExecV1,3.0
+AtomicCreateTableAsSelectExec,3.0
+AtomicReplaceTableAsSelectExec,3.0
+BatchScanExec,3.0
+OverwriteByExpressionExecV1,3.0
+BroadcastExchangeExec,3.0
+ShuffleExchangeExec,4.2
+BroadcastHashJoinExec,5.1
+BroadcastNestedLoopJoinExec,3.0
+CartesianProductExec,3.0
+ShuffledHashJoinExec,3.0
+SortMergeJoinExec,22.7
+AggregateInPandasExec,1.2
+ArrowEvalPythonExec,1.2
+FlatMapCoGroupsInPandasExec,3.0
+FlatMapGroupsInPandasExec,1.2
+MapInPandasExec,1.2
+PythonMapInArrowExec,3.0
+WindowInPandasExec,1.2
+WindowExec,3.0
+HiveTableScanExec,3.0
+Abs,4
+Acos,4
+Acosh,4
+Add,4
+AggregateExpression,4
+Alias,4
+And,4
+ApproximatePercentile,4
+ArrayContains,4
+ArrayExcept,4
+ArrayExists,4
+ArrayFilter,4
+ArrayIntersect,4
+ArrayJoin,4
+ArrayMax,4
+ArrayMin,4
+ArrayRemove,4
+ArrayRepeat,4
+ArrayTransform,4
+ArrayUnion,4
+ArraysOverlap,4
+ArraysZip,4
+Ascii,4
+Asin,4
+Asinh,4
+AtLeastNNonNulls,4
+Atan,4
+Atanh,4
+AttributeReference,4
+Average,4
+BRound,4
+BitLength,4
+BitwiseAnd,4
+BitwiseNot,4
+BitwiseOr,4
+BitwiseXor,4
+BloomFilterAggregate,4
+BloomFilterMightContain,4
+BoundReference,4
+CaseWhen,4
+Cbrt,4
+Ceil,4
+CheckOverflowInTableInsert,4
+Coalesce,4
+CollectList,4
+CollectSet,4
+Concat,4
+ConcatWs,4
+Contains,4
+Conv,4
+Cos,4
+Cosh,4
+Cot,4
+Count,4
+CreateArray,4
+CreateMap,4
+CreateNamedStruct,4
+CurrentRow$,4
+DateAdd,4
+DateAddInterval,4
+DateDiff,4
+DateFormatClass,4
+DateSub,4
+DayOfMonth,4
+DayOfWeek,4
+DayOfYear,4
+DenseRank,4
+Divide,4
+DivideDTInterval,4
+DivideYMInterval,4
+DynamicPruningExpression,4
+ElementAt,4
+Empty2Null,4
+EndsWith,4
+EqualNullSafe,4
+EqualTo,4
+Exp,4
+Explode,4
+Expm1,4
+First,4
+Flatten,4
+Floor,4
+FormatNumber,4
+FromUTCTimestamp,4
+FromUnixTime,4
+GetArrayItem,4
+GetArrayStructFields,4
+GetJsonObject,4
+GetMapValue,4
+GetStructField,4
+GetTimestamp,4
+GreaterThan,4
+GreaterThanOrEqual,4
+Greatest,4
+HiveGenericUDF,4
+HiveHash,4
+HiveSimpleUDF,4
+Hour,4
+Hypot,4
+If,4
+In,4
+InSet,4
+InSubqueryExec,4
+InitCap,4
+InputFileBlockLength,4
+InputFileBlockStart,4
+InputFileName,4
+IntegralDivide,4
+IsNaN,4
+IsNotNull,4
+IsNull,4
+JsonToStructs,4
+JsonTuple,4
+KnownFloatingPointNormalized,4
+KnownNotNull,4
+KnownNullable,4
+Lag,4
+LambdaFunction,4
+Last,4
+LastDay,4
+Lead,4
+Least,4
+Length,4
+LessThan,4
+LessThanOrEqual,4
+Like,4
+Literal,4
+Log,4
+Log10,4
+Log1p,4
+Log2,4
+Logarithm,4
+Lower,4
+MakeDecimal,4
+MapConcat,4
+MapEntries,4
+MapFilter,4
+MapFromArrays,4
+MapKeys,4
+MapValues,4
+Max,4
+MaxBy,4
+Md5,4
+MicrosToTimestamp,4
+MillisToTimestamp,4
+Min,4
+MinBy,4
+Minute,4
+MonotonicallyIncreasingID,4
+Month,4
+MonthsBetween,4
+Multiply,4
+MultiplyDTInterval,4
+MultiplyYMInterval,4
+Murmur3Hash,4
+NaNvl,4
+NamedLambdaVariable,4
+NormalizeNaNAndZero,4
+Not,4
+NthValue,4
+OctetLength,4
+Or,4
+ParseUrl,4
+PercentRank,4
+Percentile,4
+PivotFirst,4
+Pmod,4
+PosExplode,4
+Pow,4
+PreciseTimestampConversion,4
+PythonUDF,4
+Quarter,4
+RLike,4
+RaiseError,4
+Rand,4
+Rank,4
+RegExpExtract,4
+RegExpExtractAll,4
+RegExpReplace,4
+Remainder,4
+ReplicateRows,4
+Reverse,4
+Rint,4
+Round,4
+RoundCeil,4
+RoundFloor,4
+RowNumber,4
+ScalaUDF,4
+ScalarSubquery,4
+Second,4
+SecondsToTimestamp,4
+Sequence,4
+ShiftLeft,4
+ShiftRight,4
+ShiftRightUnsigned,4
+Signum,4
+Sin,4
+Sinh,4
+Size,4
+SortArray,4
+SortOrder,4
+SparkPartitionID,4
+SpecifiedWindowFrame,4
+Sqrt,4
+Stack,4
+StartsWith,4
+StddevPop,4
+StddevSamp,4
+StringInstr,4
+StringLPad,4
+StringLocate,4
+StringRPad,4
+StringRepeat,4
+StringReplace,4
+StringSplit,4
+StringToMap,4
+StringTranslate,4
+StringTrim,4
+StringTrimLeft,4
+StringTrimRight,4
+StructsToJson,4
+Substring,4
+SubstringIndex,4
+Subtract,4
+Sum,4
+Tan,4
+Tanh,4
+TimeAdd,4
+ToDegrees,4
+ToRadians,4
+ToUTCTimestamp,4
+ToUnixTimestamp,4
+TransformKeys,4
+TransformValues,4
+UnaryMinus,4
+UnaryPositive,4
+UnboundedFollowing$,4
+UnboundedPreceding$,4
+UnixTimestamp,4
+UnscaledValue,4
+Upper,4
+VariancePop,4
+VarianceSamp,4
+WeekDay,4
+WindowExpression,4
+WindowSpecDefinition,4
+XxHash64,4
+Year,4
diff --git a/tools/generated_files/344/supportedDataSource.csv b/tools/generated_files/344/supportedDataSource.csv
new file mode 100644
index 00000000000..82df521b39b
--- /dev/null
+++ b/tools/generated_files/344/supportedDataSource.csv
@@ -0,0 +1,13 @@
+Format,Direction,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+Avro,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+CSV,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Delta,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
+HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
+ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
+ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+Parquet,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/344/supportedExecs.csv b/tools/generated_files/344/supportedExecs.csv
new file mode 100644
index 00000000000..9cd10316f8f
--- /dev/null
+++ b/tools/generated_files/344/supportedExecs.csv
@@ -0,0 +1,56 @@
+Exec,Supported,Notes,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+CoalesceExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+CollectLimitExec,NS,This is disabled by default because Collect Limit replacement can be slower on the GPU; if huge number of rows in a batch it could help by limiting the number of rows transferred from GPU to CPU,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ExpandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+FileSourceScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+FilterExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+GenerateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GlobalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LocalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+RangeExec,S,None,Input/Output,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SampleExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,S,S
+SortExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+SubqueryBroadcastExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+TakeOrderedAndProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+UnionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+AQEShuffleReadExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+InMemoryTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S
+DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS,S,S
+ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AppendDataExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+AtomicCreateTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+AtomicReplaceTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+BatchScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+OverwriteByExpressionExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+BroadcastExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ShuffleExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+BroadcastHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+BroadcastHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+BroadcastHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BroadcastHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+BroadcastNestedLoopJoinExec,S,None,condition(A non-inner join only is supported if the condition expression can be converted to a GPU AST expression),S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BroadcastNestedLoopJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CartesianProductExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShuffledHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+SortMergeJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+SortMergeJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+SortMergeJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortMergeJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+AggregateInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ArrowEvalPythonExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+FlatMapCoGroupsInPandasExec,NS,This is disabled by default because Performance is not ideal with many small groups,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+FlatMapGroupsInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+MapInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonMapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+WindowInPandasExec,NS,This is disabled by default because it only supports row based frame for now,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS
+WindowExec,S,None,partitionSpec,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+HiveTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
diff --git a/tools/generated_files/344/supportedExprs.csv b/tools/generated_files/344/supportedExprs.csv
new file mode 100644
index 00000000000..01a48b40249
--- /dev/null
+++ b/tools/generated_files/344/supportedExprs.csv
@@ -0,0 +1,794 @@
+Expression,Supported,SQL Func,Notes,Context,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+Abs,S,`abs`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S
+Abs,S,`abs`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S
+Abs,S,`abs`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS
+Abs,S,`abs`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS
+Acos,S,`acos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Add,S,`+`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Add,S,`+`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Add,S,`+`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Alias,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Alias,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+And,S,`and`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayContains,S,`array_contains`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayContains,S,`array_contains`,None,project,key,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ArrayContains,S,`array_contains`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,delimiter,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,nullReplacement,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayMax,S,`array_max`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayMax,S,`array_max`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+ArrayMin,S,`array_min`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayMin,S,`array_min`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+ArrayRemove,S,`array_remove`,None,project,array,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS
+ArrayRemove,S,`array_remove`,None,project,element,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayRemove,S,`array_remove`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayRepeat,S,`array_repeat`,None,project,left,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayRepeat,S,`array_repeat`,None,project,right,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayRepeat,S,`array_repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayTransform,S,`transform`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayTransform,S,`transform`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayTransform,S,`transform`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArraysZip,S,`arrays_zip`,None,project,children,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysZip,S,`arrays_zip`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AtLeastNNonNulls,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+AtLeastNNonNulls,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AttributeReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,AST,input,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BoundReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+BoundReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+CaseWhen,S,`when`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CaseWhen,S,`when`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CaseWhen,S,`when`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Cbrt,S,`cbrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ceil,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ceil,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CheckOverflowInTableInsert,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+CheckOverflowInTableInsert,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Coalesce,S,`coalesce`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Coalesce,S,`coalesce`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Concat,S,`concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA
+Concat,S,`concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA
+ConcatWs,S,`concat_ws`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ConcatWs,S,`concat_ws`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,num,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,from_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,to_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CreateArray,S,`array`,None,project,arg,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,NS,PS,NS,NS,NS
+CreateArray,S,`array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CreateMap,S,`map`,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,NA,PS,NA,NA,NA
+CreateMap,S,`map`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,PS,PS,NA,NA,NA
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,name,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
+CurrentRow$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,rhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,strfmt,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfMonth,S,`day`; `dayofmonth`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfMonth,S,`day`; `dayofmonth`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfWeek,S,`dayofweek`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfWeek,S,`dayofweek`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfYear,S,`dayofyear`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfYear,S,`dayofyear`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DenseRank,S,`dense_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+DenseRank,S,`dense_rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+DivideDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+DivideYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+DivideYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+DynamicPruningExpression,S, ,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DynamicPruningExpression,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+ElementAt,S,`element_at`,None,project,array/map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+ElementAt,S,`element_at`,None,project,index/key,PS,PS,PS,S,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ElementAt,S,`element_at`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Empty2Null,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Empty2Null,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualNullSafe,S,`<=>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualNullSafe,S,`<=>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualNullSafe,S,`<=>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualTo,S,`==`; `=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualTo,S,`==`; `=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Explode,S,`explode_outer`; `explode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+Explode,S,`explode_outer`; `explode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Flatten,S,`flatten`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Flatten,S,`flatten`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Floor,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Floor,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,x,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,d,NA,NA,NA,PS,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,sec,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
+GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetStructField,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
+GetStructField,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetTimestamp,S, ,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetTimestamp,S, ,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetTimestamp,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThan,S,`>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThan,S,`>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThan,S,`>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThan,S,`>`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS
+HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+If,S,`if`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+If,S,`if`,None,project,trueValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+If,S,`if`,None,project,falseValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+If,S,`if`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+In,S,`in`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+In,S,`in`,None,project,list,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+In,S,`in`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InSet,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+InSet,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileBlockLength,S,`input_file_block_length`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileBlockStart,S,`input_file_block_start`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileName,S,`input_file_name`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,lhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNaN,S,`isnan`,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNaN,S,`isnan`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
+IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
+IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS
+KnownNotNull,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS
+KnownNullable,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownNullable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Lag,S,`lag`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lag,S,`lag`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lag,S,`lag`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lag,S,`lag`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,arguments,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LastDay,S,`last_day`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LastDay,S,`last_day`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lead,S,`lead`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lead,S,`lead`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lead,S,`lead`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lead,S,`lead`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Least,S,`least`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Least,S,`least`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Length,S,`char_length`; `character_length`; `len`; `length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Length,S,`char_length`; `character_length`; `len`; `length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThan,S,`<`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThan,S,`<`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThan,S,`<`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThan,S,`<`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThanOrEqual,S,`<=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Literal,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,S,S
+Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+Log,S,`ln`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log,S,`ln`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log10,S,`log10`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log10,S,`log10`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log1p,S,`log1p`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log1p,S,`log1p`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log2,S,`log2`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log2,S,`log2`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,value,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,base,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MakeDecimal,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MakeDecimal,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MapConcat,S,`map_concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapConcat,S,`map_concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapEntries,S,`map_entries`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapEntries,S,`map_entries`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,keys,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,values,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapKeys,S,`map_keys`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapKeys,S,`map_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapValues,S,`map_values`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapValues,S,`map_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Md5,S,`md5`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+Md5,S,`md5`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MicrosToTimestamp,S,`timestamp_micros`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MicrosToTimestamp,S,`timestamp_micros`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MillisToTimestamp,S,`timestamp_millis`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MillisToTimestamp,S,`timestamp_millis`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Minute,S,`minute`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+MultiplyDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+MultiplyYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+MultiplyYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+Murmur3Hash,S,`hash`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Murmur3Hash,S,`hash`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,lhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,rhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NamedLambdaVariable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+Not,S,`!`; `not`,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,AST,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NthValue,S,`nth_value`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+NthValue,S,`nth_value`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NthValue,S,`nth_value`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+OctetLength,S,`octet_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+OctetLength,S,`octet_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,url,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,partToExtract,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,key,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PercentRank,S,`percent_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PercentRank,S,`percent_rank`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PosExplode,S,`posexplode_outer`; `posexplode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+PosExplode,S,`posexplode_outer`; `posexplode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PreciseTimestampConversion,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PreciseTimestampConversion,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PythonUDF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+Quarter,S,`quarter`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Quarter,S,`quarter`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RaiseError,S,`raise_error`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RaiseError,S,`raise_error`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+Rand,S,`rand`; `random`,None,project,seed,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rand,S,`rand`; `random`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rank,S,`rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+Rank,S,`rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,idx,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,idx,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,regex,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,pos,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,rep,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ReplicateRows,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+ReplicateRows,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Reverse,S,`reverse`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Reverse,S,`reverse`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RowNumber,S,`row_number`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ScalaUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+Second,S,`second`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Second,S,`second`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SecondsToTimestamp,S,`timestamp_seconds`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SecondsToTimestamp,S,`timestamp_seconds`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,start,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,stop,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,step,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Signum,S,`sign`; `signum`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Signum,S,`sign`; `signum`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Size,S,`cardinality`; `size`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+Size,S,`cardinality`; `size`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,ascendingOrder,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+SortOrder,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+SortOrder,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+SparkPartitionID,S,`spark_partition_id`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SpecifiedWindowFrame,S, ,None,project,lower,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS
+SpecifiedWindowFrame,S, ,None,project,upper,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS
+SpecifiedWindowFrame,S, ,None,project,result,NA,S,S,S,S,NS,NS,NA,NA,NA,NS,NA,NA,S,NA,NA,NA,NA,S,NS
+Sqrt,S,`sqrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,n,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,expr,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+Stack,S,`stack`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,start,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,repeatTimes,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,replace,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,limit,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,pairDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,keyValueDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,from,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,to,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,struct,S,S,S,S,S,S,S,S,PS,S,S,NA,NA,NA,PS,PS,PS,NA,NA,NA
+StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,len,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,delim,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,count,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Subtract,S,`-`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Subtract,S,`-`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Subtract,S,`-`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Tan,S,`tan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TimeAdd,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TimeAdd,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,S,NA
+TimeAdd,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToDegrees,S,`degrees`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToDegrees,S,`degrees`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToRadians,S,`radians`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToRadians,S,`radians`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TransformKeys,S,`transform_keys`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformKeys,S,`transform_keys`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NS,NS
+TransformKeys,S,`transform_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformValues,S,`transform_values`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformValues,S,`transform_values`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+TransformValues,S,`transform_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+UnaryMinus,S,`negative`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryMinus,S,`negative`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryMinus,S,`negative`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+UnaryMinus,S,`negative`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+UnaryPositive,S,`positive`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,AST,input,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,AST,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S
+UnboundedFollowing$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+UnboundedPreceding$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnscaledValue,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnscaledValue,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WeekDay,S,`weekday`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WeekDay,S,`weekday`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WindowExpression,S, ,None,window,windowFunction,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WindowExpression,S, ,None,window,windowSpec,NA,S,S,S,S,NS,NS,NA,NA,NA,PS,NA,NA,S,NA,NA,NA,NA,S,NS
+WindowExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WindowSpecDefinition,S, ,None,project,partition,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowSpecDefinition,S, ,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowSpecDefinition,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+XxHash64,S,`xxhash64`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+XxHash64,S,`xxhash64`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Year,S,`year`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Year,S,`year`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,aggregation,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,aggregation,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,reduction,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,reduction,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,window,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,window,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,child,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,estimatedItems,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,numBits,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Count,S,`count`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,aggregation,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Count,S,`count`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,reduction,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Count,S,`count`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,window,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+First,S,`first_value`; `first`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Max,S,`max`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Max,S,`max`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MaxBy,S,`max_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,aggregation,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MaxBy,S,`max_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,reduction,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MaxBy,S,`max_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Min,S,`min`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+PivotFirst,S, ,None,aggregation,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,aggregation,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS
+StddevPop,S,`stddev_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,aggregation,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,reduction,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,window,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv
index e6c3269b115..3b0b82d58bf 100644
--- a/tools/generated_files/350/operatorsScore.csv
+++ b/tools/generated_files/350/operatorsScore.csv
@@ -194,6 +194,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/350/supportedDataSource.csv b/tools/generated_files/350/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/350/supportedDataSource.csv
+++ b/tools/generated_files/350/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv
index cbd9627125a..4cbfc7c1c27 100644
--- a/tools/generated_files/350/supportedExprs.csv
+++ b/tools/generated_files/350/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/350db143/supportedDataSource.csv b/tools/generated_files/350db143/supportedDataSource.csv
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv
index e6c3269b115..3b0b82d58bf 100644
--- a/tools/generated_files/351/operatorsScore.csv
+++ b/tools/generated_files/351/operatorsScore.csv
@@ -194,6 +194,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/351/supportedDataSource.csv b/tools/generated_files/351/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/351/supportedDataSource.csv
+++ b/tools/generated_files/351/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv
index cbd9627125a..4cbfc7c1c27 100644
--- a/tools/generated_files/351/supportedExprs.csv
+++ b/tools/generated_files/351/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/352/operatorsScore.csv b/tools/generated_files/352/operatorsScore.csv
index e6c3269b115..3b0b82d58bf 100644
--- a/tools/generated_files/352/operatorsScore.csv
+++ b/tools/generated_files/352/operatorsScore.csv
@@ -194,6 +194,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/352/supportedDataSource.csv b/tools/generated_files/352/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/352/supportedDataSource.csv
+++ b/tools/generated_files/352/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/352/supportedExprs.csv b/tools/generated_files/352/supportedExprs.csv
index cbd9627125a..4cbfc7c1c27 100644
--- a/tools/generated_files/352/supportedExprs.csv
+++ b/tools/generated_files/352/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/353/operatorsScore.csv b/tools/generated_files/353/operatorsScore.csv
new file mode 100644
index 00000000000..3b0b82d58bf
--- /dev/null
+++ b/tools/generated_files/353/operatorsScore.csv
@@ -0,0 +1,295 @@
+CPUOperator,Score
+CoalesceExec,3.0
+CollectLimitExec,3.0
+ExpandExec,3.0
+FileSourceScanExec,3.0
+FilterExec,2.8
+GenerateExec,3.0
+GlobalLimitExec,3.0
+LocalLimitExec,3.0
+ProjectExec,3.0
+RangeExec,3.0
+SampleExec,3.0
+SortExec,8.0
+SubqueryBroadcastExec,3.0
+TakeOrderedAndProjectExec,3.0
+UnionExec,3.0
+AQEShuffleReadExec,3.0
+HashAggregateExec,4.5
+ObjectHashAggregateExec,3.0
+SortAggregateExec,3.0
+InMemoryTableScanExec,3.0
+DataWritingCommandExec,3.0
+ExecutedCommandExec,3.0
+WriteFilesExec,3.0
+AppendDataExecV1,3.0
+AtomicCreateTableAsSelectExec,3.0
+AtomicReplaceTableAsSelectExec,3.0
+BatchScanExec,3.0
+OverwriteByExpressionExecV1,3.0
+BroadcastExchangeExec,3.0
+ShuffleExchangeExec,4.2
+BroadcastHashJoinExec,5.1
+BroadcastNestedLoopJoinExec,3.0
+CartesianProductExec,3.0
+ShuffledHashJoinExec,3.0
+SortMergeJoinExec,22.7
+AggregateInPandasExec,1.2
+ArrowEvalPythonExec,1.2
+FlatMapCoGroupsInPandasExec,3.0
+FlatMapGroupsInPandasExec,1.2
+MapInPandasExec,1.2
+PythonMapInArrowExec,3.0
+WindowInPandasExec,1.2
+WindowExec,3.0
+WindowGroupLimitExec,3.0
+HiveTableScanExec,3.0
+Abs,4
+Acos,4
+Acosh,4
+Add,4
+AggregateExpression,4
+Alias,4
+And,4
+ApproximatePercentile,4
+ArrayContains,4
+ArrayExcept,4
+ArrayExists,4
+ArrayFilter,4
+ArrayIntersect,4
+ArrayJoin,4
+ArrayMax,4
+ArrayMin,4
+ArrayRemove,4
+ArrayRepeat,4
+ArrayTransform,4
+ArrayUnion,4
+ArraysOverlap,4
+ArraysZip,4
+Ascii,4
+Asin,4
+Asinh,4
+AtLeastNNonNulls,4
+Atan,4
+Atanh,4
+AttributeReference,4
+Average,4
+BRound,4
+BitLength,4
+BitwiseAnd,4
+BitwiseNot,4
+BitwiseOr,4
+BitwiseXor,4
+BloomFilterAggregate,4
+BloomFilterMightContain,4
+BoundReference,4
+CaseWhen,4
+Cbrt,4
+Ceil,4
+CheckOverflowInTableInsert,4
+Coalesce,4
+CollectList,4
+CollectSet,4
+Concat,4
+ConcatWs,4
+Contains,4
+Conv,4
+Cos,4
+Cosh,4
+Cot,4
+Count,4
+CreateArray,4
+CreateMap,4
+CreateNamedStruct,4
+CurrentRow$,4
+DateAdd,4
+DateAddInterval,4
+DateDiff,4
+DateFormatClass,4
+DateSub,4
+DayOfMonth,4
+DayOfWeek,4
+DayOfYear,4
+DenseRank,4
+Divide,4
+DivideDTInterval,4
+DivideYMInterval,4
+DynamicPruningExpression,4
+ElementAt,4
+Empty2Null,4
+EndsWith,4
+EqualNullSafe,4
+EqualTo,4
+Exp,4
+Explode,4
+Expm1,4
+First,4
+Flatten,4
+Floor,4
+FormatNumber,4
+FromUTCTimestamp,4
+FromUnixTime,4
+GetArrayItem,4
+GetArrayStructFields,4
+GetJsonObject,4
+GetMapValue,4
+GetStructField,4
+GetTimestamp,4
+GreaterThan,4
+GreaterThanOrEqual,4
+Greatest,4
+HiveGenericUDF,4
+HiveHash,4
+HiveSimpleUDF,4
+Hour,4
+Hypot,4
+If,4
+In,4
+InSet,4
+InSubqueryExec,4
+InitCap,4
+InputFileBlockLength,4
+InputFileBlockStart,4
+InputFileName,4
+IntegralDivide,4
+IsNaN,4
+IsNotNull,4
+IsNull,4
+JsonToStructs,4
+JsonTuple,4
+KnownFloatingPointNormalized,4
+KnownNotNull,4
+KnownNullable,4
+Lag,4
+LambdaFunction,4
+Last,4
+LastDay,4
+Lead,4
+Least,4
+Length,4
+LessThan,4
+LessThanOrEqual,4
+Like,4
+Literal,4
+Log,4
+Log10,4
+Log1p,4
+Log2,4
+Logarithm,4
+Lower,4
+MakeDecimal,4
+MapConcat,4
+MapEntries,4
+MapFilter,4
+MapFromArrays,4
+MapKeys,4
+MapValues,4
+Max,4
+MaxBy,4
+Md5,4
+MicrosToTimestamp,4
+MillisToTimestamp,4
+Min,4
+MinBy,4
+Minute,4
+MonotonicallyIncreasingID,4
+Month,4
+MonthsBetween,4
+Multiply,4
+MultiplyDTInterval,4
+MultiplyYMInterval,4
+Murmur3Hash,4
+NaNvl,4
+NamedLambdaVariable,4
+NormalizeNaNAndZero,4
+Not,4
+NthValue,4
+OctetLength,4
+Or,4
+ParseUrl,4
+PercentRank,4
+Percentile,4
+PivotFirst,4
+Pmod,4
+PosExplode,4
+Pow,4
+PreciseTimestampConversion,4
+PythonUDAF,4
+PythonUDF,4
+Quarter,4
+RLike,4
+RaiseError,4
+Rand,4
+Rank,4
+RegExpExtract,4
+RegExpExtractAll,4
+RegExpReplace,4
+Remainder,4
+ReplicateRows,4
+Reverse,4
+Rint,4
+Round,4
+RoundCeil,4
+RoundFloor,4
+RowNumber,4
+ScalaUDF,4
+ScalarSubquery,4
+Second,4
+SecondsToTimestamp,4
+Sequence,4
+ShiftLeft,4
+ShiftRight,4
+ShiftRightUnsigned,4
+Signum,4
+Sin,4
+Sinh,4
+Size,4
+SortArray,4
+SortOrder,4
+SparkPartitionID,4
+SpecifiedWindowFrame,4
+Sqrt,4
+Stack,4
+StartsWith,4
+StddevPop,4
+StddevSamp,4
+StringInstr,4
+StringLPad,4
+StringLocate,4
+StringRPad,4
+StringRepeat,4
+StringReplace,4
+StringSplit,4
+StringToMap,4
+StringTranslate,4
+StringTrim,4
+StringTrimLeft,4
+StringTrimRight,4
+StructsToJson,4
+Substring,4
+SubstringIndex,4
+Subtract,4
+Sum,4
+Tan,4
+Tanh,4
+TimeAdd,4
+ToDegrees,4
+ToRadians,4
+ToUTCTimestamp,4
+ToUnixTimestamp,4
+TransformKeys,4
+TransformValues,4
+UnaryMinus,4
+UnaryPositive,4
+UnboundedFollowing$,4
+UnboundedPreceding$,4
+UnixTimestamp,4
+UnscaledValue,4
+Upper,4
+VariancePop,4
+VarianceSamp,4
+WeekDay,4
+WindowExpression,4
+WindowSpecDefinition,4
+XxHash64,4
+Year,4
diff --git a/tools/generated_files/353/supportedDataSource.csv b/tools/generated_files/353/supportedDataSource.csv
new file mode 100644
index 00000000000..82df521b39b
--- /dev/null
+++ b/tools/generated_files/353/supportedDataSource.csv
@@ -0,0 +1,13 @@
+Format,Direction,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+Avro,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+CSV,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Delta,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
+HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
+ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
+ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
+Parquet,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/353/supportedExecs.csv b/tools/generated_files/353/supportedExecs.csv
new file mode 100644
index 00000000000..409fa3e45aa
--- /dev/null
+++ b/tools/generated_files/353/supportedExecs.csv
@@ -0,0 +1,57 @@
+Exec,Supported,Notes,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+CoalesceExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+CollectLimitExec,NS,This is disabled by default because Collect Limit replacement can be slower on the GPU; if huge number of rows in a batch it could help by limiting the number of rows transferred from GPU to CPU,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ExpandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+FileSourceScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+FilterExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+GenerateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GlobalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LocalLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+RangeExec,S,None,Input/Output,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SampleExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,S,S
+SortExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+SubqueryBroadcastExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+TakeOrderedAndProjectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+UnionExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+AQEShuffleReadExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+HashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+ObjectHashAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+SortAggregateExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,PS,NS,PS,PS,PS,NS,NS,NS
+InMemoryTableScanExec,NS,This is disabled by default because there could be complications when using it with AQE with Spark-3.5.0 and Spark-3.5.1. For more details please check https://github.com/NVIDIA/spark-rapids/issues/10603,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,PS,PS,PS,NS,S,S
+DataWritingCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,PS,NS,S,NS,PS,PS,PS,NS,S,S
+ExecutedCommandExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WriteFilesExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AppendDataExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+AtomicCreateTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+AtomicReplaceTableAsSelectExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+BatchScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+OverwriteByExpressionExecV1,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,S,NS,PS,PS,PS,NS,S,S
+BroadcastExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ShuffleExchangeExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+BroadcastHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+BroadcastHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+BroadcastHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BroadcastHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+BroadcastNestedLoopJoinExec,S,None,condition(A non-inner join only is supported if the condition expression can be converted to a GPU AST expression),S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BroadcastNestedLoopJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CartesianProductExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+ShuffledHashJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShuffledHashJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+SortMergeJoinExec,S,None,leftKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+SortMergeJoinExec,S,None,rightKeys,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NS,NS
+SortMergeJoinExec,S,None,condition,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortMergeJoinExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+AggregateInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ArrowEvalPythonExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+FlatMapCoGroupsInPandasExec,NS,This is disabled by default because Performance is not ideal with many small groups,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+FlatMapGroupsInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+MapInPandasExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonMapInArrowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+WindowInPandasExec,NS,This is disabled by default because it only supports row based frame for now,Input/Output,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS
+WindowExec,S,None,partitionSpec,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+WindowGroupLimitExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+HiveTableScanExec,S,None,Input/Output,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
diff --git a/tools/generated_files/353/supportedExprs.csv b/tools/generated_files/353/supportedExprs.csv
new file mode 100644
index 00000000000..4cbfc7c1c27
--- /dev/null
+++ b/tools/generated_files/353/supportedExprs.csv
@@ -0,0 +1,802 @@
+Expression,Supported,SQL Func,Notes,Context,Params,BOOLEAN,BYTE,SHORT,INT,LONG,FLOAT,DOUBLE,DATE,TIMESTAMP,STRING,DECIMAL,NULL,BINARY,CALENDAR,ARRAY,MAP,STRUCT,UDT,DAYTIME,YEARMONTH
+Abs,S,`abs`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S
+Abs,S,`abs`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,S
+Abs,S,`abs`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS
+Abs,S,`abs`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NS,NS
+Acos,S,`acos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acos,S,`acos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Acosh,S,`acosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Add,S,`+`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Add,S,`+`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Add,S,`+`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Add,S,`+`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Alias,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Alias,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Alias,S, ,None,AST,input,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+Alias,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+And,S,`and`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+And,S,`and`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayContains,S,`array_contains`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayContains,S,`array_contains`,None,project,key,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ArrayContains,S,`array_contains`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExcept,S,`array_except`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayExists,S,`exists`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayFilter,S,`filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayIntersect,S,`array_intersect`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,delimiter,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,nullReplacement,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayJoin,S,`array_join`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayMax,S,`array_max`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayMax,S,`array_max`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+ArrayMin,S,`array_min`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayMin,S,`array_min`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+ArrayRemove,S,`array_remove`,None,project,array,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS,PS,NS,NS,NS,NS,NS
+ArrayRemove,S,`array_remove`,None,project,element,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayRemove,S,`array_remove`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayRepeat,S,`array_repeat`,None,project,left,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayRepeat,S,`array_repeat`,None,project,right,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArrayRepeat,S,`array_repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayTransform,S,`transform`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayTransform,S,`transform`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+ArrayTransform,S,`transform`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArrayUnion,S,`array_union`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array1,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,array2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysOverlap,S,`arrays_overlap`,This is not 100% compatible with the Spark version because the GPU implementation treats -0.0 and 0.0 as equal; but the CPU implementation currently does not (see SPARK-39845). Also; Apache Spark 3.1.3 fixed issue SPARK-36741 where NaNs in these set like operators were not treated as being equal. We have chosen to break with compatibility for the older versions of Spark in this instance and handle NaNs the same as 3.1.3+,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ArraysZip,S,`arrays_zip`,None,project,children,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ArraysZip,S,`arrays_zip`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ascii,NS,`ascii`,This is disabled by default because it only supports strings starting with ASCII or Latin-1 characters after Spark 3.2.3; 3.3.1 and 3.4.0. Otherwise the results will not match the CPU.,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asin,S,`asin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Asinh,S,`asinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AtLeastNNonNulls,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+AtLeastNNonNulls,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atan,S,`atan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Atanh,S,`atanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AttributeReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+AttributeReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+BRound,S,`bround`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BRound,S,`bround`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BRound,S,`bround`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitLength,S,`bit_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+BitLength,S,`bit_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseAnd,S,`&`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,AST,input,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseNot,S,`~`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseOr,S,`\|`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,lhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,rhs,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,project,result,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,lhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,rhs,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BitwiseXor,S,`^`,None,AST,result,NA,NS,NS,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterMightContain,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BoundReference,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+BoundReference,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,S,S
+CaseWhen,S,`when`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CaseWhen,S,`when`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CaseWhen,S,`when`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Cbrt,S,`cbrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cbrt,S,`cbrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ceil,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Ceil,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CheckOverflowInTableInsert,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+CheckOverflowInTableInsert,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Coalesce,S,`coalesce`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Coalesce,S,`coalesce`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+Concat,S,`concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA
+Concat,S,`concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,PS,NA,NA,NA,NA,NA
+ConcatWs,S,`concat_ws`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ConcatWs,S,`concat_ws`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Contains,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,num,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,from_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,to_base,NA,PS,PS,PS,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Conv,NS,`conv`,This is disabled by default because GPU implementation is incomplete. We currently only support from/to_base values of 10 and 16. We fall back on CPU if the signed conversion is signalled via a negative to_base. GPU implementation does not check for an 64-bit signed/unsigned int overflow when performing the conversion to return `FFFFFFFFFFFFFFFF` or `18446744073709551615` or to throw an error in the ANSI mode. It is safe to enable if the overflow is not possible or detected externally. For instance decimal strings not longer than 18 characters / hexadecimal strings not longer than 15 characters disregarding the sign cannot cause an overflow.          ,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cos,S,`cos`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cosh,S,`cosh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Cot,S,`cot`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CreateArray,S,`array`,None,project,arg,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,NS,PS,NS,NS,NS
+CreateArray,S,`array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CreateMap,S,`map`,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,NA,PS,NA,NA,NA
+CreateMap,S,`map`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NA,NA,PS,PS,PS,NA,NA,NA
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,name,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CreateNamedStruct,S,`named_struct`; `struct`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
+CurrentRow$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAdd,S,`date_add`; `dateadd`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA
+DateAddInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,rhs,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateDiff,S,`date_diff`; `datediff`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,strfmt,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateFormatClass,S,`date_format`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,startDate,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,days,NA,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DateSub,S,`date_sub`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfMonth,S,`day`; `dayofmonth`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfMonth,S,`day`; `dayofmonth`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfWeek,S,`dayofweek`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfWeek,S,`dayofweek`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfYear,S,`dayofyear`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DayOfYear,S,`dayofyear`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DenseRank,S,`dense_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+DenseRank,S,`dense_rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Divide,S,`/`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+DivideDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+DivideYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+DivideYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DivideYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+DynamicPruningExpression,S, ,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+DynamicPruningExpression,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+ElementAt,S,`element_at`,None,project,array/map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+ElementAt,S,`element_at`,None,project,index/key,PS,PS,PS,S,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+ElementAt,S,`element_at`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Empty2Null,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Empty2Null,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EndsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualNullSafe,S,`<=>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualNullSafe,S,`<=>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualNullSafe,S,`<=>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualTo,S,`==`; `=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+EqualTo,S,`==`; `=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+EqualTo,S,`==`; `=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Exp,S,`exp`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Explode,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+Explode,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Expm1,S,`expm1`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Flatten,S,`flatten`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Flatten,S,`flatten`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Floor,S, ,None,project,input,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Floor,S, ,None,project,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,x,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,d,NA,NA,NA,PS,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FormatNumber,S,`format_number`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUTCTimestamp,S,`from_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,sec,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+FromUnixTime,S,`from_unixtime`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,ordinal,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetArrayItem,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetArrayStructFields,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetArrayStructFields,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,path,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetJsonObject,S,`get_json_object`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetMapValue,S, ,None,project,map,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+GetMapValue,S, ,None,project,key,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
+GetMapValue,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetStructField,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA
+GetStructField,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+GetTimestamp,S, ,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetTimestamp,S, ,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GetTimestamp,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThan,S,`>`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThan,S,`>`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThan,S,`>`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThan,S,`>`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThan,S,`>`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThan,S,`>`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS
+HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Hypot,S,`hypot`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+If,S,`if`,None,project,predicate,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+If,S,`if`,None,project,trueValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+If,S,`if`,None,project,falseValue,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+If,S,`if`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,S
+In,S,`in`,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+In,S,`in`,None,project,list,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,PS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+In,S,`in`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InSet,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+InSet,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InitCap,S,`initcap`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileBlockLength,S,`input_file_block_length`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileBlockStart,S,`input_file_block_start`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InputFileName,S,`input_file_name`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,lhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,rhs,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IntegralDivide,S,`div`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNaN,S,`isnan`,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNaN,S,`isnan`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
+IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
+IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+KnownFloatingPointNormalized,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownFloatingPointNormalized,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownNotNull,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS
+KnownNotNull,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,NS,S,S,PS,PS,PS,NS,NS,NS
+KnownNullable,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+KnownNullable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Lag,S,`lag`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lag,S,`lag`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lag,S,`lag`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lag,S,`lag`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,arguments,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LambdaFunction,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+LastDay,S,`last_day`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LastDay,S,`last_day`,None,project,result,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lead,S,`lead`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lead,S,`lead`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lead,S,`lead`,None,window,default,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Lead,S,`lead`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Least,S,`least`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Least,S,`least`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Length,S,`char_length`; `character_length`; `len`; `length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Length,S,`char_length`; `character_length`; `len`; `length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThan,S,`<`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThan,S,`<`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThan,S,`<`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThan,S,`<`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThan,S,`<`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThan,S,`<`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThanOrEqual,S,`<=`,None,project,lhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,project,rhs,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,PS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,lhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA,NS,NS,NA,NA
+LessThanOrEqual,S,`<=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Like,S,`like`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Literal,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,S,S
+Literal,S, ,None,AST,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,NS,NS,NS,NS,NS,NS
+Log,S,`ln`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log,S,`ln`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log10,S,`log10`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log10,S,`log10`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log1p,S,`log1p`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log1p,S,`log1p`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log2,S,`log2`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Log2,S,`log2`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,value,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,base,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Logarithm,S,`log`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Lower,S,`lcase`; `lower`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MakeDecimal,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MakeDecimal,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MapConcat,S,`map_concat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapConcat,S,`map_concat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapEntries,S,`map_entries`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapEntries,S,`map_entries`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,function,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MapFilter,S,`map_filter`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,keys,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,values,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapFromArrays,S,`map_from_arrays`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapKeys,S,`map_keys`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapKeys,S,`map_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+MapValues,S,`map_values`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+MapValues,S,`map_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Md5,S,`md5`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+Md5,S,`md5`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MicrosToTimestamp,S,`timestamp_micros`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MicrosToTimestamp,S,`timestamp_micros`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MillisToTimestamp,S,`timestamp_millis`,None,project,input,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MillisToTimestamp,S,`timestamp_millis`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Minute,S,`minute`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Multiply,S,`*`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyDTInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+MultiplyDTInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyDTInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA
+MultiplyYMInterval,S, ,None,project,lhs,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+MultiplyYMInterval,S, ,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MultiplyYMInterval,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S
+Murmur3Hash,S,`hash`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+Murmur3Hash,S,`hash`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,lhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,rhs,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NaNvl,S,`nanvl`,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NamedLambdaVariable,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+Not,S,`!`; `not`,None,project,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,AST,input,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Not,S,`!`; `not`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NthValue,S,`nth_value`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+NthValue,S,`nth_value`,None,window,offset,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NthValue,S,`nth_value`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+OctetLength,S,`octet_length`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+OctetLength,S,`octet_length`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,lhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,rhs,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Or,S,`or`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,url,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,partToExtract,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,key,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ParseUrl,S,`parse_url`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PercentRank,S,`percent_rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PercentRank,S,`percent_rank`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pmod,S,`pmod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PosExplode,S,`posexplode_outer`; `posexplode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+PosExplode,S,`posexplode_outer`; `posexplode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Pow,S,`pow`; `power`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PreciseTimestampConversion,S, ,None,project,input,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PreciseTimestampConversion,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+PythonUDAF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDAF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDAF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDAF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDAF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDAF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDAF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDAF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,aggregation,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,reduction,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,window,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+PythonUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NS,PS,NS,PS,NS,NS,NS
+PythonUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,NS,NS,NS,NA,PS,NS,PS,NA,NA,NA
+Quarter,S,`quarter`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Quarter,S,`quarter`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RLike,S,`regexp_like`; `regexp`; `rlike`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RaiseError,S,`raise_error`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RaiseError,S,`raise_error`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+Rand,S,`rand`; `random`,None,project,seed,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rand,S,`rand`; `random`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rank,S,`rank`,None,window,ordering,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+Rank,S,`rank`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,idx,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtract,S,`regexp_extract`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,idx,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpExtractAll,S,`regexp_extract_all`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,regex,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,pos,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RegExpReplace,S,`regexp_replace`,None,project,rep,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Remainder,S,`%`; `mod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ReplicateRows,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+ReplicateRows,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Reverse,S,`reverse`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Reverse,S,`reverse`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Rint,S,`rint`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Round,S,`round`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundCeil,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,value,NA,S,S,S,S,PS,PS,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,scale,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RoundFloor,S, ,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+RowNumber,S,`row_number`,None,window,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ScalaUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+ScalaUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+Second,S,`second`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Second,S,`second`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SecondsToTimestamp,S,`timestamp_seconds`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SecondsToTimestamp,S,`timestamp_seconds`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,start,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,stop,NA,S,S,S,S,NA,NA,NS,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,step,NA,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA
+Sequence,S,`sequence`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftLeft,S,`shiftleft`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRight,S,`shiftright`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,value,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,amount,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ShiftRightUnsigned,S,`shiftrightunsigned`,None,project,result,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Signum,S,`sign`; `signum`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Signum,S,`sign`; `signum`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sin,S,`sin`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sinh,S,`sinh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Size,S,`cardinality`; `size`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+Size,S,`cardinality`; `size`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,array,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,ascendingOrder,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SortArray,S,`sort_array`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+SortOrder,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+SortOrder,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+SparkPartitionID,S,`spark_partition_id`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SpecifiedWindowFrame,S, ,None,project,lower,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS
+SpecifiedWindowFrame,S, ,None,project,upper,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,S,NA,NA,NA,NA,S,NS
+SpecifiedWindowFrame,S, ,None,project,result,NA,S,S,S,S,NS,NS,NA,NA,NA,NS,NA,NA,S,NA,NA,NA,NA,S,NS
+Sqrt,S,`sqrt`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sqrt,S,`sqrt`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,n,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Stack,S,`stack`,None,project,expr,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+Stack,S,`stack`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StartsWith,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringInstr,S,`instr`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,substr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,start,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringLocate,S,`locate`; `position`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,len,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,pad,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRPad,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,repeatTimes,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringRepeat,S,`repeat`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,search,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,replace,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringReplace,S,`replace`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,regexp,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,limit,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringSplit,S,`split`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,pairDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,keyValueDelim,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringToMap,S,`str_to_map`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,from,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,to,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTranslate,S,`translate`,This is not 100% compatible with the Spark version because the GPU implementation supports all unicode code points. In Spark versions < 3.2.0; translate() does not support unicode characters with code point >= U+10000 (See SPARK-34094),project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrim,S,`trim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,struct,S,S,S,S,S,S,S,S,PS,S,S,NA,NA,NA,PS,PS,PS,NA,NA,NA
+StructsToJson,NS,`to_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,len,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Substring,S,`substr`; `substring`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,delim,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,count,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+SubstringIndex,S,`substring_index`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Subtract,S,`-`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+Subtract,S,`-`,None,AST,lhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Subtract,S,`-`,None,AST,rhs,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Subtract,S,`-`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+Tan,S,`tan`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tan,S,`tan`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,AST,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Tanh,S,`tanh`,None,AST,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TimeAdd,S, ,None,project,start,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TimeAdd,S, ,None,project,interval,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,S,NA
+TimeAdd,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToDegrees,S,`degrees`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToDegrees,S,`degrees`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToRadians,S,`radians`,None,project,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToRadians,S,`radians`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timestamp,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,timezone,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUTCTimestamp,S,`to_utc_timestamp`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ToUnixTimestamp,S,`to_unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+TransformKeys,S,`transform_keys`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformKeys,S,`transform_keys`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NS,NS
+TransformKeys,S,`transform_keys`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformValues,S,`transform_values`,None,project,argument,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+TransformValues,S,`transform_values`,None,project,function,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,PS,PS,NS,NS,NS
+TransformValues,S,`transform_values`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA
+UnaryMinus,S,`negative`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryMinus,S,`negative`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryMinus,S,`negative`,None,AST,input,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+UnaryMinus,S,`negative`,None,AST,result,NA,NS,NS,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,NS,NS
+UnaryPositive,S,`positive`,None,project,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,AST,input,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S
+UnaryPositive,S,`positive`,None,AST,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NS,NA,NA,NA,NA,S,S
+UnboundedFollowing$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+UnboundedPreceding$,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,timeExp,NA,NA,NA,NA,NA,NA,NA,S,PS,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,format,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnixTimestamp,S,`unix_timestamp`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnscaledValue,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
+UnscaledValue,S, ,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Upper,S,`ucase`; `upper`,This is not 100% compatible with the Spark version because the Unicode version used by cuDF and the JVM may differ; resulting in some corner-case characters not changing case correctly.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WeekDay,S,`weekday`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WeekDay,S,`weekday`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+WindowExpression,S, ,None,window,windowFunction,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WindowExpression,S, ,None,window,windowSpec,NA,S,S,S,S,NS,NS,NA,NA,NA,PS,NA,NA,S,NA,NA,NA,NA,S,NS
+WindowExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+WindowSpecDefinition,S, ,None,project,partition,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowSpecDefinition,S, ,None,project,value,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+WindowSpecDefinition,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,PS,NS,NS,NS
+XxHash64,S,`xxhash64`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+XxHash64,S,`xxhash64`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Year,S,`year`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Year,S,`year`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,aggregation,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,aggregation,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,reduction,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,reduction,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,window,aggFunc,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+AggregateExpression,S, ,None,window,filter,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+AggregateExpression,S, ,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,aggregation,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,input,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,percentage,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,accuracy,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ApproximatePercentile,S,`approx_percentile`; `percentile_approx`,This is not 100% compatible with the Spark version because the GPU implementation of approx_percentile is not bit-for-bit compatible with Apache Spark,reduction,result,NA,S,S,S,S,S,S,NS,NS,NA,S,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Average,S,`avg`; `mean`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,S,NA,NS,NA,NA,NA,NA,NS,NS
+Average,S,`avg`; `mean`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,child,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,estimatedItems,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,numBits,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+BloomFilterAggregate,S, ,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectList,S,`array_agg`; `collect_list`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+CollectList,S,`array_agg`; `collect_list`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,aggregation,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,reduction,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+CollectSet,S,`collect_set`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,PS,NS,NS,NS
+CollectSet,S,`collect_set`,None,window,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+Count,S,`count`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,aggregation,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Count,S,`count`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,reduction,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Count,S,`count`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,S,S,S
+Count,S,`count`,None,window,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+First,S,`first_value`; `first`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+First,S,`first_value`; `first`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Last,S,`last_value`; `last`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Max,S,`max`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Max,S,`max`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Max,S,`max`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MaxBy,S,`max_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,aggregation,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MaxBy,S,`max_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MaxBy,S,`max_by`,None,reduction,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MaxBy,S,`max_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Min,S,`min`,None,aggregation,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,reduction,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+Min,S,`min`,None,window,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+Min,S,`min`,None,window,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,aggregation,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,value,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+MinBy,S,`min_by`,None,reduction,ordering,S,S,S,S,S,NS,NS,S,PS,S,S,S,NS,NS,PS,NA,PS,NS,NA,NA
+MinBy,S,`min_by`,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+Percentile,S,`percentile`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,percentage,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,frequency,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+Percentile,S,`percentile`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
+PivotFirst,S, ,None,aggregation,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,aggregation,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,aggregation,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,pivotColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,valueColumn,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NS,NS,NS,NS,NS
+PivotFirst,S, ,None,reduction,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,PS,NS,NS,NS,NS,NS
+StddevPop,S,`stddev_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevPop,S,`stddev_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,reduction,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+StddevSamp,S,`std`; `stddev_samp`; `stddev`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,aggregation,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,aggregation,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,reduction,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,reduction,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,window,input,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+Sum,S,`sum`,None,window,result,NA,NA,NA,NA,S,NA,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VariancePop,S,`var_pop`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,reduction,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,reduction,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,aggregation,input,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,aggregation,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,window,input,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+VarianceSamp,S,`var_samp`; `variance`,None,window,result,NA,NA,NA,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NormalizeNaNAndZero,S, ,None,project,input,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+NormalizeNaNAndZero,S, ,None,project,result,NA,NA,NA,NA,NA,S,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+InSubqueryExec,S, ,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA
+InSubqueryExec,S, ,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+ScalarSubquery,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
+HiveGenericUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveGenericUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveSimpleUDF,S, ,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
+HiveSimpleUDF,S, ,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,S,S,PS,PS,PS,NS,NS,NS
diff --git a/tools/generated_files/400/operatorsScore.csv b/tools/generated_files/400/operatorsScore.csv
index 50eae950857..53791a06705 100644
--- a/tools/generated_files/400/operatorsScore.csv
+++ b/tools/generated_files/400/operatorsScore.csv
@@ -194,6 +194,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 MultiplyDTInterval,4
 MultiplyYMInterval,4
diff --git a/tools/generated_files/400/supportedDataSource.csv b/tools/generated_files/400/supportedDataSource.csv
index 77f30cbe1de..82df521b39b 100644
--- a/tools/generated_files/400/supportedDataSource.csv
+++ b/tools/generated_files/400/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,S,S
diff --git a/tools/generated_files/400/supportedExprs.csv b/tools/generated_files/400/supportedExprs.csv
index bcfd565f913..4cfa1020889 100644
--- a/tools/generated_files/400/supportedExprs.csv
+++ b/tools/generated_files/400/supportedExprs.csv
@@ -299,8 +299,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,S,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -384,6 +384,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
@@ -426,8 +430,8 @@ PercentRank,S,`percent_rank`,None,window,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,
 Pmod,S,`pmod`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Pmod,S,`pmod`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Pmod,S,`pmod`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,NS,NA,NA,NA,NA,NA,NA,NA,NA,NA
-PosExplode,S,`posexplode_outer`; `posexplode`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
-PosExplode,S,`posexplode_outer`; `posexplode`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
+PosExplode,S, ,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,PS,NA,NA,NA,NA
+PosExplode,S, ,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA
 Pow,S,`pow`; `power`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Pow,S,`pow`; `power`,None,project,rhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Pow,S,`pow`; `power`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/generated_files/operatorsScore.csv b/tools/generated_files/operatorsScore.csv
index 09056a7a285..19c999aa796 100644
--- a/tools/generated_files/operatorsScore.csv
+++ b/tools/generated_files/operatorsScore.csv
@@ -184,6 +184,7 @@ MinBy,4
 Minute,4
 MonotonicallyIncreasingID,4
 Month,4
+MonthsBetween,4
 Multiply,4
 Murmur3Hash,4
 NaNvl,4
diff --git a/tools/generated_files/supportedDataSource.csv b/tools/generated_files/supportedDataSource.csv
index 2573406ec3b..2eae4ed00ce 100644
--- a/tools/generated_files/supportedDataSource.csv
+++ b/tools/generated_files/supportedDataSource.csv
@@ -6,7 +6,7 @@ Delta,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 HiveText,read,S,S,S,S,S,S,S,S,PS,S,S,NS,NS,NS,NS,NS,NS,NS,NS,NS
 HiveText,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Iceberg,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
-JSON,read,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO,CO
+JSON,read,S,S,S,S,S,S,S,PS,PS,S,S,NA,NS,NA,PS,NS,PS,NS,NA,NA
 ORC,read,S,S,S,S,S,S,S,S,PS,S,S,NA,NS,NA,PS,PS,PS,NS,NA,NA
 ORC,write,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Parquet,read,S,S,S,S,S,S,S,S,PS,S,S,NA,S,NA,PS,PS,PS,NS,NA,NA
diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
index 6aeb2eccd8c..e4a4db760b0 100644
--- a/tools/generated_files/supportedExprs.csv
+++ b/tools/generated_files/supportedExprs.csv
@@ -288,8 +288,8 @@ IsNotNull,S,`isnotnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,P
 IsNotNull,S,`isnotnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 IsNull,S,`isnull`,None,project,input,S,S,S,S,S,S,S,S,PS,S,S,S,S,NS,PS,PS,PS,NS,NS,NS
 IsNull,S,`isnull`,None,project,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
-JsonToStructs,NS,`from_json`,This is disabled by default because it is currently in beta and undergoes continuous enhancements. Please consult the [compatibility documentation](../compatibility.md#json-supporting-types) to determine whether you can enable this configuration for your use case,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,jsonStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+JsonToStructs,S,`from_json`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NS,PS,PS,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,json,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,field,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 JsonTuple,NS,`json_tuple`,This is disabled by default because Experimental feature that could be unstable or have performance issues.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA
@@ -371,6 +371,10 @@ Minute,S,`minute`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,N
 MonotonicallyIncreasingID,S,`monotonically_increasing_id`,None,project,result,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,input,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Month,S,`month`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp1,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,timestamp2,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,round,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
+MonthsBetween,S,`months_between`,None,project,result,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,lhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,rhs,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
 Multiply,S,`*`,None,project,result,NA,S,S,S,S,S,S,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA
diff --git a/tools/pom.xml b/tools/pom.xml
index fc8ba4acb85..8725aca6cbe 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -22,14 +22,14 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../jdk-profiles/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-tools-support</artifactId>
     <packaging>pom</packaging>
     <name>RAPIDS Accelerator for Apache Spark Tools Support</name>
     <description>Supporting code for RAPIDS Accelerator tools</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
     <dependencies>
         <dependency>
             <groupId>com.nvidia</groupId>
@@ -94,7 +94,7 @@
                                 <taskdef resource="net/sf/antcontrib/antcontrib.properties"/>
                                 <ac:if xmlns:ac="antlib:net.sf.antcontrib">
                                     <not>
-                                        <matches pattern="db$" string="${buildver}"/>
+                                        <matches pattern="db" string="${buildver}"/>
                                     </not>
                                     <then>
                                     <mkdir dir="${tools.datagen.dir}"/>
diff --git a/udf-compiler/pom.xml b/udf-compiler/pom.xml
index 3772ac12244..c4c2ce587bb 100644
--- a/udf-compiler/pom.xml
+++ b/udf-compiler/pom.xml
@@ -21,13 +21,13 @@
     <parent>
         <groupId>com.nvidia</groupId>
         <artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
-        <version>24.10.1</version>
+        <version>24.12.0</version>
         <relativePath>../shim-deps/pom.xml</relativePath>
     </parent>
     <artifactId>rapids-4-spark-udf_2.12</artifactId>
     <name>RAPIDS Accelerator for Apache Spark Scala UDF Plugin</name>
     <description>The RAPIDS Scala UDF plugin for Apache Spark</description>
-    <version>24.10.1</version>
+    <version>24.12.0</version>
 
     <properties>
         <rapids.module>udf-compiler</rapids.module>
diff --git a/udf-compiler/src/main/scala/com/nvidia/spark/udf/CFG.scala b/udf-compiler/src/main/scala/com/nvidia/spark/udf/CFG.scala
index 9c1e9ce915a..34472e890e0 100644
--- a/udf-compiler/src/main/scala/com/nvidia/spark/udf/CFG.scala
+++ b/udf-compiler/src/main/scala/com/nvidia/spark/udf/CFG.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ import scala.collection.immutable.{HashMap, SortedMap, SortedSet}
 
 import CatalystExpressionBuilder.simplify
 import javassist.bytecode.{CodeIterator, ConstPool, InstructionPrinter, Opcode}
+import javassist.bytecode.analysis.Util
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
@@ -140,7 +141,22 @@ object CFG {
    * Iterate through the code to find out the basic blocks
    */
   def apply(lambdaReflection: LambdaReflection): CFG = {
+    // find the last return in this lambda expression.
+    // we use this in scala 2.13+ because we see new RETURN instructions where
+    // scala 2.12 would before use GOTO. We are undoing this while we parse the
+    // bytecode because we would like to not complicate our code parsing logic
+    // to merge different branches (each RETURN would be a leaf). By turning this
+    // into GOTO [last return] we bring back the old behavior of scala 2.12.
     val codeIterator = lambdaReflection.codeIterator
+    codeIterator.begin()
+    var lastReturnOffset = 0
+    while(codeIterator.hasNext) {
+      val offset = codeIterator.next()
+      val opcode: Int = codeIterator.byteAt(offset)
+      if (Util.isReturn(opcode)) {
+        lastReturnOffset = offset
+      }
+    }
 
     // labels: targets of branching instructions (offset)
     // edges: connection between branch instruction offset, and target offsets (successors)
@@ -148,10 +164,12 @@ object CFG {
     //        if return there would be no successors (likely)
     //        goto has 1 successors
     codeIterator.begin()
-    val (labels, edges) = collectLabelsAndEdges(codeIterator, lambdaReflection.constPool)
+    val (labels, edges) = collectLabelsAndEdges(
+      codeIterator, lambdaReflection.constPool, lastReturnOffset)
 
     codeIterator.begin() // rewind
-    val instructionTable = createInstructionTable(codeIterator, lambdaReflection.constPool)
+    val instructionTable = createInstructionTable(
+      codeIterator, lambdaReflection.constPool, lastReturnOffset)
 
     val (basicBlocks, offsetToBB) = createBasicBlocks(labels, instructionTable)
 
@@ -163,6 +181,7 @@ object CFG {
   @tailrec
   private def collectLabelsAndEdges(codeIterator: CodeIterator,
       constPool: ConstPool,
+      lastReturnOffset: Int,
       labels: SortedSet[Int] = SortedSet(),
       edges: SortedMap[Int, List[(Int, Int)]] = SortedMap())
   : (SortedSet[Int], SortedMap[Int, List[(Int, Int)]]) = {
@@ -172,6 +191,13 @@ object CFG {
       val opcode: Int = codeIterator.byteAt(offset)
       // here we are looking for branching instructions
       opcode match {
+        case _ if Util.isReturn(opcode) && offset != lastReturnOffset =>
+          // if we had any return along the way, we are going to replace it
+          // with a GOTO [lastReturnOffset]
+          collectLabelsAndEdges(
+            codeIterator, constPool, lastReturnOffset,
+            labels + lastReturnOffset,
+            edges + (offset -> List((0, lastReturnOffset))))
         case Opcode.IF_ICMPEQ | Opcode.IF_ICMPNE | Opcode.IF_ICMPLT |
              Opcode.IF_ICMPGE | Opcode.IF_ICMPGT | Opcode.IF_ICMPLE |
              Opcode.IFEQ | Opcode.IFNE | Opcode.IFLT | Opcode.IFGE |
@@ -188,7 +214,7 @@ object CFG {
           // keep iterating, having added the false and true offsets to the labels,
           // and having added the edges (if offset -> List(false offset, true offset))
           collectLabelsAndEdges(
-            codeIterator, constPool,
+            codeIterator, constPool, lastReturnOffset,
             labels + falseOffset + trueOffset,
             edges + (offset -> List((0, falseOffset), (1, trueOffset))))
         case Opcode.TABLESWITCH =>
@@ -203,7 +229,7 @@ object CFG {
             (low + i, offset + codeIterator.s32bitAt(tableOffset + i * 4))
           } :+ default
           collectLabelsAndEdges(
-            codeIterator, constPool,
+            codeIterator, constPool, lastReturnOffset,
             labels ++ table.map(_._2),
             edges + (offset -> table))
         case Opcode.LOOKUPSWITCH =>
@@ -214,10 +240,10 @@ object CFG {
           val tableOffset = npairsOffset + 4
           val table = List.tabulate(npairs) { i =>
             (codeIterator.s32bitAt(tableOffset + i * 8),
-                offset + codeIterator.s32bitAt(tableOffset + i * 8 + 4))
+              offset + codeIterator.s32bitAt(tableOffset + i * 8 + 4))
           } :+ default
           collectLabelsAndEdges(
-            codeIterator, constPool,
+            codeIterator, constPool, lastReturnOffset,
             labels ++ table.map(_._2),
             edges + (offset -> table))
         case Opcode.GOTO | Opcode.GOTO_W =>
@@ -229,14 +255,16 @@ object CFG {
           }
           val labelOffset = offset + getOffset(offset + 1)
           collectLabelsAndEdges(
-            codeIterator, constPool,
+            codeIterator, constPool, lastReturnOffset,
             labels + labelOffset,
             edges + (offset -> List((0, labelOffset))))
         case Opcode.IF_ACMPEQ | Opcode.IF_ACMPNE |
              Opcode.JSR | Opcode.JSR_W | Opcode.RET =>
-          val instructionStr = InstructionPrinter.instructionString(codeIterator, offset, constPool)
+          val instructionStr = InstructionPrinter.instructionString(
+            codeIterator, offset, constPool)
           throw new SparkException("Unsupported instruction: " + instructionStr)
-        case _ => collectLabelsAndEdges(codeIterator, constPool, labels, edges)
+        case _ => collectLabelsAndEdges(codeIterator, constPool, lastReturnOffset,
+          labels, edges)
       }
     } else {
       // base case
@@ -245,15 +273,27 @@ object CFG {
   }
 
   @tailrec
-  private def createInstructionTable(codeIterator: CodeIterator, constPool: ConstPool,
+  private def createInstructionTable(
+      codeIterator: CodeIterator,
+      constPool: ConstPool,
+      lastReturnOffset: Int,
       instructionTable: SortedMap[Int, Instruction] = SortedMap())
   : SortedMap[Int, Instruction] = {
     if (codeIterator.hasNext) {
       val offset = codeIterator.next
-      val instructionStr = InstructionPrinter.instructionString(codeIterator, offset, constPool)
-      val instruction = Instruction(codeIterator, offset, instructionStr)
-      createInstructionTable(codeIterator, constPool,
-        instructionTable + (offset -> instruction))
+      val opcode = codeIterator.byteAt(offset)
+      if (Util.isReturn(opcode) && offset != lastReturnOffset) {
+        // an internal RETURN is replaced by GOTO to the last return of the
+        // lambda.
+        val instruction = Instruction(Opcode.GOTO, lastReturnOffset, "GOTO")
+        createInstructionTable(codeIterator, constPool, lastReturnOffset,
+          instructionTable + (offset -> instruction))
+      } else {
+        val instructionStr = InstructionPrinter.instructionString(codeIterator, offset, constPool)
+        val instruction = Instruction(codeIterator, offset, instructionStr)
+        createInstructionTable(codeIterator, constPool, lastReturnOffset,
+          instructionTable + (offset -> instruction))
+      }
     } else {
       instructionTable
     }
diff --git a/udf-compiler/src/test/scala-2.12/com/nvidia/spark/OpcodeSuite.scala b/udf-compiler/src/test/scala/com/nvidia/spark/OpcodeSuite.scala
similarity index 99%
rename from udf-compiler/src/test/scala-2.12/com/nvidia/spark/OpcodeSuite.scala
rename to udf-compiler/src/test/scala/com/nvidia/spark/OpcodeSuite.scala
index a344e19c03e..a08d57c3cf3 100644
--- a/udf-compiler/src/test/scala-2.12/com/nvidia/spark/OpcodeSuite.scala
+++ b/udf-compiler/src/test/scala/com/nvidia/spark/OpcodeSuite.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,6 +36,8 @@ class OpcodeSuite extends AnyFunSuite {
   val conf: SparkConf = new SparkConf()
     .set("spark.sql.extensions", "com.nvidia.spark.udf.Plugin")
     .set("spark.rapids.sql.udfCompiler.enabled", "true")
+     // ANSI is disabled due to https://github.com/NVIDIA/spark-rapids/issues/11633
+    .set("spark.sql.ansi.enabled", "false")
     .set(RapidsConf.EXPLAIN.key, "true")
 
   val spark: SparkSession =
@@ -2384,7 +2386,8 @@ class OpcodeSuite extends AnyFunSuite {
       run(20)
     } catch {
       case e: RuntimeException =>
-        assert(e.getMessage == "Fold number must be in range [0, 20), but got 20.")
+        // in new versions of spark, the message has extra information, so we use contains.
+        assert(e.getMessage.contains("Fold number must be in range [0, 20), but got 20."))
     }
   }