diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
index b3cbbb6ad14..4b8071303c1 100644
--- a/.github/workflows/blossom-ci.yml
+++ b/.github/workflows/blossom-ci.yml
@@ -33,45 +33,49 @@ jobs:
       args: ${{ env.args }}
 
     # This job only runs for pull request comments
-    if: contains( '\
-      abellina,\
-      anfeng,\
-      firestarman,\
-      GaryShen2008,\
-      jlowe,\
-      kuhushukla,\
-      mythrocks,\
-      nartal1,\
-      nvdbaranec,\
-      NvTimLiu,\
-      razajafri,\
-      revans2,\
-      rwlee,\
-      sameerz,\
-      tgravescs,\
-      wbo4958,\
-      wjxiz1992,\
-      sperlingxx,\
-      hyperbolic2346,\
-      gerashegalov,\
-      ttnghia,\
-      nvliyuan,\
-      res-life,\
-      HaoYang670,\
-      NVnavkumar,\
-      amahussein,\
-      mattahrens,\
-      YanxuanLiu,\
-      cindyyuanjiang,\
-      thirtiseven,\
-      winningsix,\
-      viadea,\
-      yinqingh,\
-      parthosa,\
-      liurenjie1024,\
-      binmahone,\
-      zpuller,\
-      ', format('{0},', github.actor)) && github.event.comment.body == 'build'
+    if: |
+      github.event.comment.body == 'build' &&
+      (
+        github.actor == 'abellina' ||
+        github.actor == 'anfeng' ||
+        github.actor == 'firestarman' ||
+        github.actor == 'GaryShen2008' ||
+        github.actor == 'jlowe' ||
+        github.actor == 'kuhushukla' ||
+        github.actor == 'mythrocks' ||
+        github.actor == 'nartal1' ||
+        github.actor == 'nvdbaranec' ||
+        github.actor == 'NvTimLiu' ||
+        github.actor == 'razajafri' ||
+        github.actor == 'revans2' ||
+        github.actor == 'rwlee' ||
+        github.actor == 'sameerz' ||
+        github.actor == 'tgravescs' ||
+        github.actor == 'wbo4958' ||
+        github.actor == 'wjxiz1992' ||
+        github.actor == 'sperlingxx' ||
+        github.actor == 'hyperbolic2346' ||
+        github.actor == 'gerashegalov' ||
+        github.actor == 'ttnghia' ||
+        github.actor == 'nvliyuan' ||
+        github.actor == 'res-life' ||
+        github.actor == 'HaoYang670' ||
+        github.actor == 'NVnavkumar' ||
+        github.actor == 'amahussein' ||
+        github.actor == 'mattahrens' ||
+        github.actor == 'YanxuanLiu' ||
+        github.actor == 'cindyyuanjiang' ||
+        github.actor == 'thirtiseven' ||
+        github.actor == 'winningsix' ||
+        github.actor == 'viadea' ||
+        github.actor == 'yinqingh' ||
+        github.actor == 'parthosa' ||
+        github.actor == 'liurenjie1024' ||
+        github.actor == 'binmahone' ||
+        github.actor == 'zpuller' ||
+        github.actor == 'pxLi'  ||
+        github.actor == 'Feng-Jiang28'
+      )
     steps:
       - name: Check if comment is issued by authorized person
         run: blossom-ci
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0340afa5931..788fed0a98f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,121 @@
 # Change log
-Generated on 2024-05-20
+Generated on 2024-06-13
+
+## Release 24.06
+
+### Features
+|||
+|:---|:---|
+|[#10850](https://github.com/NVIDIA/spark-rapids/issues/10850)|[FEA] Refine the test framework introduced in #10745|
+|[#6969](https://github.com/NVIDIA/spark-rapids/issues/6969)|[FEA] Support parse_url |
+|[#10496](https://github.com/NVIDIA/spark-rapids/issues/10496)|[FEA] Drop support for CentOS7|
+|[#10760](https://github.com/NVIDIA/spark-rapids/issues/10760)|[FEA]Support ArrayFilter|
+|[#10721](https://github.com/NVIDIA/spark-rapids/issues/10721)|[FEA] Dump the complete set of build-info properties to the Spark eventLog|
+|[#10666](https://github.com/NVIDIA/spark-rapids/issues/10666)|[FEA]  Create Spark 3.4.3 shim|
+
+### Performance
+|||
+|:---|:---|
+|[#8963](https://github.com/NVIDIA/spark-rapids/issues/8963)|[FEA] Use custom kernel for parse_url|
+|[#10817](https://github.com/NVIDIA/spark-rapids/issues/10817)|[FOLLOW ON] Combining regex parsing in transpiling and regex rewrite in `rlike`|
+|[#10821](https://github.com/NVIDIA/spark-rapids/issues/10821)|Rewrite `pattern[A-B]{X,Y}` (a pattern string followed by X to Y chars in range A - B) in `RLIKE` to a custom kernel|
+
+### Bugs Fixed
+|||
+|:---|:---|
+|[#10928](https://github.com/NVIDIA/spark-rapids/issues/10928)|[BUG] 24.06 test_conditional_with_side_effects_case_when test failed on Scala 2.13 with DATAGEN_SEED=1716656294|
+|[#10941](https://github.com/NVIDIA/spark-rapids/issues/10941)|[BUG] Failed to build on databricks due to GpuOverrides.scala:4264: not found: type GpuSubqueryBroadcastMeta|
+|[#10902](https://github.com/NVIDIA/spark-rapids/issues/10902)|Spark UT failed: SPARK-37360: Timestamp type inference for a mix of TIMESTAMP_NTZ and TIMESTAMP_LTZ|
+|[#10899](https://github.com/NVIDIA/spark-rapids/issues/10899)|[BUG] format_number Spark UT failed because Type conversion is not allowed|
+|[#10913](https://github.com/NVIDIA/spark-rapids/issues/10913)|[BUG] rlike with empty pattern failed with 'NoSuchElementException' when enabling regex rewrite|
+|[#10774](https://github.com/NVIDIA/spark-rapids/issues/10774)|[BUG] Issues found by Spark UT Framework on RapidsRegexpExpressionsSuite|
+|[#10606](https://github.com/NVIDIA/spark-rapids/issues/10606)|[BUG] Update Plugin to use the new `getPartitionedFile` method|
+|[#10806](https://github.com/NVIDIA/spark-rapids/issues/10806)|[BUG] orc_write_test.py::test_write_round_trip_corner failed with DATAGEN_SEED=1715517863|
+|[#10831](https://github.com/NVIDIA/spark-rapids/issues/10831)|[BUG] Failed to read data from iceberg|
+|[#10810](https://github.com/NVIDIA/spark-rapids/issues/10810)|[BUG] NPE when running `ParseUrl` tests in `RapidsStringExpressionsSuite`|
+|[#10797](https://github.com/NVIDIA/spark-rapids/issues/10797)|[BUG] udf_test test_single_aggregate_udf, test_group_aggregate_udf and test_group_apply_udf_more_types failed on DB 13.3|
+|[#10719](https://github.com/NVIDIA/spark-rapids/issues/10719)|[BUG] test_exact_percentile_groupby FAILED: hash_aggregate_test.py::test_exact_percentile_groupby with DATAGEN seed 1713362217|
+|[#10738](https://github.com/NVIDIA/spark-rapids/issues/10738)|[BUG] test_exact_percentile_groupby_partial_fallback_to_cpu failed with DATAGEN_SEED=1713928179|
+|[#10768](https://github.com/NVIDIA/spark-rapids/issues/10768)|[DOC] Dead links with tools pages|
+|[#10751](https://github.com/NVIDIA/spark-rapids/issues/10751)|[BUG] Cascaded Pandas UDFs not working as expected on Databricks when plugin is enabled|
+|[#10318](https://github.com/NVIDIA/spark-rapids/issues/10318)|[BUG] `fs.azure.account.keyInvalid` configuration issue while reading from Unity Catalog Tables on Azure DB|
+|[#10722](https://github.com/NVIDIA/spark-rapids/issues/10722)|[BUG] "Could not find any rapids-4-spark jars in classpath" error when debugging UT in IDEA|
+|[#10724](https://github.com/NVIDIA/spark-rapids/issues/10724)|[BUG] Failed to convert string with invisible characters to float|
+|[#10633](https://github.com/NVIDIA/spark-rapids/issues/10633)|[BUG] ScanJson and JsonToStructs can give almost random errors|
+|[#10659](https://github.com/NVIDIA/spark-rapids/issues/10659)|[BUG] from_json ArrayIndexOutOfBoundsException in 24.02|
+|[#10656](https://github.com/NVIDIA/spark-rapids/issues/10656)|[BUG] Databricks cache tests failing with host memory OOM|
+
+### PRs
+|||
+|:---|:---|
+|[#11052](https://github.com/NVIDIA/spark-rapids/pull/11052)|Add spark343 shim for scala2.13 dist jar|
+|[#10981](https://github.com/NVIDIA/spark-rapids/pull/10981)|Update latest changelog [skip ci]|
+|[#10984](https://github.com/NVIDIA/spark-rapids/pull/10984)|[DOC] Update docs for 24.06.0 release [skip ci]|
+|[#10974](https://github.com/NVIDIA/spark-rapids/pull/10974)|Update rapids JNI and private dependency to 24.06.0|
+|[#10947](https://github.com/NVIDIA/spark-rapids/pull/10947)|Prevent contains-PrefixRange optimization if not preceded by wildcards|
+|[#10934](https://github.com/NVIDIA/spark-rapids/pull/10934)|Revert "Add Support for Multiple Filtering Keys for Subquery Broadcast "|
+|[#10870](https://github.com/NVIDIA/spark-rapids/pull/10870)|Add support for self-contained profiling|
+|[#10903](https://github.com/NVIDIA/spark-rapids/pull/10903)|Use upper case for LEGACY_TIME_PARSER_POLICY to fix a spark UT|
+|[#10900](https://github.com/NVIDIA/spark-rapids/pull/10900)|Fix type convert error in format_number scalar input|
+|[#10868](https://github.com/NVIDIA/spark-rapids/pull/10868)|Disable default cuDF pinned pool|
+|[#10914](https://github.com/NVIDIA/spark-rapids/pull/10914)|Fix NoSuchElementException when rlike with empty pattern|
+|[#10858](https://github.com/NVIDIA/spark-rapids/pull/10858)|Add Support for Multiple Filtering Keys for Subquery Broadcast |
+|[#10861](https://github.com/NVIDIA/spark-rapids/pull/10861)|refine ut framework including Part 1 and Part 2|
+|[#10872](https://github.com/NVIDIA/spark-rapids/pull/10872)|[DOC] ignore released plugin links to reduce the bother info [skip ci]|
+|[#10839](https://github.com/NVIDIA/spark-rapids/pull/10839)|Replace anonymous classes for SortOrder and FIlterExec overrides|
+|[#10873](https://github.com/NVIDIA/spark-rapids/pull/10873)|Auto merge PRs to branch-24.08 from branch-24.06 [skip ci]|
+|[#10860](https://github.com/NVIDIA/spark-rapids/pull/10860)|[Spark 4.0] Account for `PartitionedFileUtil.getPartitionedFile` signature change.|
+|[#10822](https://github.com/NVIDIA/spark-rapids/pull/10822)|Rewrite regex pattern `literal[a-b]{x}` to custom kernel in rlike|
+|[#10833](https://github.com/NVIDIA/spark-rapids/pull/10833)|Filter out unused json_path tokens|
+|[#10855](https://github.com/NVIDIA/spark-rapids/pull/10855)|Fix auto merge conflict 10845 [[skip ci]]|
+|[#10826](https://github.com/NVIDIA/spark-rapids/pull/10826)|Add NVTX ranges to identify Spark stages and tasks|
+|[#10846](https://github.com/NVIDIA/spark-rapids/pull/10846)|Update latest changelog [skip ci]|
+|[#10836](https://github.com/NVIDIA/spark-rapids/pull/10836)|Catch exceptions when trying to examine Iceberg scan for metadata queries|
+|[#10824](https://github.com/NVIDIA/spark-rapids/pull/10824)|Support zstd for GPU shuffle compression|
+|[#10828](https://github.com/NVIDIA/spark-rapids/pull/10828)|Added DateTimeUtilsShims [Databricks]|
+|[#10829](https://github.com/NVIDIA/spark-rapids/pull/10829)|Fix `Inheritance Shadowing` to add support for Spark 4.0.0|
+|[#10811](https://github.com/NVIDIA/spark-rapids/pull/10811)|Fix NPE in GpuParseUrl for null keys.|
+|[#10723](https://github.com/NVIDIA/spark-rapids/pull/10723)|Implement chunked ORC reader|
+|[#10715](https://github.com/NVIDIA/spark-rapids/pull/10715)|Rewrite some rlike expression to StartsWith/Contains|
+|[#10820](https://github.com/NVIDIA/spark-rapids/pull/10820)|workaround #10801 temporally|
+|[#10812](https://github.com/NVIDIA/spark-rapids/pull/10812)|Replace ThreadPoolExecutor creation with ThreadUtils API|
+|[#10816](https://github.com/NVIDIA/spark-rapids/pull/10816)|Fix a test error for DB13.3|
+|[#10813](https://github.com/NVIDIA/spark-rapids/pull/10813)|Fix the errors for Pandas UDF tests on DB13.3|
+|[#10795](https://github.com/NVIDIA/spark-rapids/pull/10795)|Remove fixed seed for exact `percentile` integration tests|
+|[#10805](https://github.com/NVIDIA/spark-rapids/pull/10805)|Drop Support for CentOS 7|
+|[#10800](https://github.com/NVIDIA/spark-rapids/pull/10800)|Add number normalization test and address followup for getJsonObject|
+|[#10796](https://github.com/NVIDIA/spark-rapids/pull/10796)|fixing build break on DBR|
+|[#10791](https://github.com/NVIDIA/spark-rapids/pull/10791)|Fix auto merge conflict 10779 [skip ci]|
+|[#10636](https://github.com/NVIDIA/spark-rapids/pull/10636)|Update actions version [skip ci]|
+|[#10743](https://github.com/NVIDIA/spark-rapids/pull/10743)|initial PR for the framework reusing Vanilla Spark's unit tests|
+|[#10767](https://github.com/NVIDIA/spark-rapids/pull/10767)|Add rows-only batches support to RebatchingRoundoffIterator|
+|[#10763](https://github.com/NVIDIA/spark-rapids/pull/10763)|Add in the GpuArrayFilter command|
+|[#10766](https://github.com/NVIDIA/spark-rapids/pull/10766)|Fix dead links related to tools documentation [skip ci]|
+|[#10644](https://github.com/NVIDIA/spark-rapids/pull/10644)|Add logging to Integration test runs in local and local-cluster mode|
+|[#10756](https://github.com/NVIDIA/spark-rapids/pull/10756)|Fix Authorization Failure While Reading Tables From Unity Catalog|
+|[#10752](https://github.com/NVIDIA/spark-rapids/pull/10752)|Add SparkRapidsBuildInfoEvent to the event log|
+|[#10754](https://github.com/NVIDIA/spark-rapids/pull/10754)|Substitute whoami for $USER|
+|[#10755](https://github.com/NVIDIA/spark-rapids/pull/10755)|[DOC] Update README for prioritize-commits script [skip ci]|
+|[#10728](https://github.com/NVIDIA/spark-rapids/pull/10728)|Let big data gen set nullability recursively|
+|[#10740](https://github.com/NVIDIA/spark-rapids/pull/10740)|Use parse_url kernel for PATH parsing|
+|[#10734](https://github.com/NVIDIA/spark-rapids/pull/10734)|Add short circuit path for get-json-object when there is separate wildcard path|
+|[#10725](https://github.com/NVIDIA/spark-rapids/pull/10725)|Initial definition for Spark 4.0.0 shim|
+|[#10635](https://github.com/NVIDIA/spark-rapids/pull/10635)|Use new getJsonObject kernel for json_tuple|
+|[#10739](https://github.com/NVIDIA/spark-rapids/pull/10739)|Use fixed seed for some random failed tests|
+|[#10720](https://github.com/NVIDIA/spark-rapids/pull/10720)|Add Shims for Spark 3.4.3|
+|[#10716](https://github.com/NVIDIA/spark-rapids/pull/10716)|Remove the mixedType config for JSON as it has no downsides any longer|
+|[#10733](https://github.com/NVIDIA/spark-rapids/pull/10733)|Fix "Could not find any rapids-4-spark jars in classpath" error when debugging UT in IDEA|
+|[#10718](https://github.com/NVIDIA/spark-rapids/pull/10718)|Change parameters for memory limit in Parquet chunked reader|
+|[#10292](https://github.com/NVIDIA/spark-rapids/pull/10292)|Upgrade to UCX 1.16.0|
+|[#10709](https://github.com/NVIDIA/spark-rapids/pull/10709)|Removing some authorizations for departed users [skip ci]|
+|[#10726](https://github.com/NVIDIA/spark-rapids/pull/10726)|Append new authorized user to blossom-ci whitelist [skip ci]|
+|[#10708](https://github.com/NVIDIA/spark-rapids/pull/10708)|Updated dump tool to verify get_json_object|
+|[#10706](https://github.com/NVIDIA/spark-rapids/pull/10706)|Fix auto merge conflict 10704 [skip ci]|
+|[#10675](https://github.com/NVIDIA/spark-rapids/pull/10675)|Fix merge conflict with branch-24.04 [skip ci]|
+|[#10678](https://github.com/NVIDIA/spark-rapids/pull/10678)|Append new authorized user to blossom-ci whitelist [skip ci]|
+|[#10662](https://github.com/NVIDIA/spark-rapids/pull/10662)|Audit script - Check commits from shuffle and storage directories [skip ci]|
+|[#10655](https://github.com/NVIDIA/spark-rapids/pull/10655)|Update rapids jni/private dependency to 24.06|
+|[#10652](https://github.com/NVIDIA/spark-rapids/pull/10652)|Substitute murmurHash32 for spark32BitMurmurHash3|
 
 ## Release 24.04
 
@@ -85,8 +201,12 @@ Generated on 2024-05-20
 ### PRs
 |||
 |:---|:---|
+|[#10844](https://github.com/NVIDIA/spark-rapids/pull/10844)|Update rapids private dependency to 24.04.3|
+|[#10788](https://github.com/NVIDIA/spark-rapids/pull/10788)|[DOC] Update archive page for v24.04.1 [skip ci]|
+|[#10784](https://github.com/NVIDIA/spark-rapids/pull/10784)|Update latest changelog [skip ci]|
 |[#10782](https://github.com/NVIDIA/spark-rapids/pull/10782)|Update latest changelog [skip ci]|
 |[#10780](https://github.com/NVIDIA/spark-rapids/pull/10780)|[DOC]Update download page for v24.04.1 [skip ci]|
+|[#10778](https://github.com/NVIDIA/spark-rapids/pull/10778)|Update version to 24.04.1-SNAPSHOT|
 |[#10777](https://github.com/NVIDIA/spark-rapids/pull/10777)|Update rapids JNI dependency: private to 24.04.2|
 |[#10683](https://github.com/NVIDIA/spark-rapids/pull/10683)|Update latest changelog [skip ci]|
 |[#10681](https://github.com/NVIDIA/spark-rapids/pull/10681)|Update rapids JNI dependency to 24.04.0, private to 24.04.1|
@@ -172,307 +292,7 @@ Generated on 2024-05-20
 |[#10348](https://github.com/NVIDIA/spark-rapids/pull/10348)|Remove redundant joinOutputRows metric|
 |[#10321](https://github.com/NVIDIA/spark-rapids/pull/10321)|Bump up dependency version to 24.04.0-SNAPSHOT|
 |[#10330](https://github.com/NVIDIA/spark-rapids/pull/10330)|Add tryAcquire to GpuSemaphore|
-|[#10331](https://github.com/NVIDIA/spark-rapids/pull/10331)|Revert "Update to libcudf unsigned sum aggregation types change (#10267)"|
 |[#10258](https://github.com/NVIDIA/spark-rapids/pull/10258)|Init project version 24.04.0-SNAPSHOT|
 
-## Release 24.02
-
-### Features
-|||
-|:---|:---|
-|[#9926](https://github.com/NVIDIA/spark-rapids/issues/9926)|[FEA] Add config option for the parquet reader input read limit.|
-|[#10270](https://github.com/NVIDIA/spark-rapids/issues/10270)|[FEA] Add support for single quotes when reading JSON|
-|[#10253](https://github.com/NVIDIA/spark-rapids/issues/10253)|[FEA] Enable mixed types as string in GpuJsonToStruct|
-|[#9692](https://github.com/NVIDIA/spark-rapids/issues/9692)|[FEA] Remove Pascal support|
-|[#8806](https://github.com/NVIDIA/spark-rapids/issues/8806)|[FEA] Support lazy quantifier and specified group index in regexp_extract function|
-|[#10079](https://github.com/NVIDIA/spark-rapids/issues/10079)|[FEA] Add string parameter support for `unix_timestamp` for non-UTC time zones|
-|[#9667](https://github.com/NVIDIA/spark-rapids/issues/9667)|[FEA][JSON] Add support for non default `dateFormat` in `from_json`|
-|[#9173](https://github.com/NVIDIA/spark-rapids/issues/9173)|[FEA] Support format_number |
-|[#10145](https://github.com/NVIDIA/spark-rapids/issues/10145)|[FEA] Support to_utc_timestamp|
-|[#9927](https://github.com/NVIDIA/spark-rapids/issues/9927)|[FEA] Support to_date with non-UTC timezones without DST|
-|[#10006](https://github.com/NVIDIA/spark-rapids/issues/10006)|[FEA] Support ```ParseToTimestamp``` for non-UTC time zones|
-|[#9096](https://github.com/NVIDIA/spark-rapids/issues/9096)|[FEA] Add Spark 3.3.4 support|
-|[#9585](https://github.com/NVIDIA/spark-rapids/issues/9585)|[FEA] support ascii function|
-|[#9260](https://github.com/NVIDIA/spark-rapids/issues/9260)|[FEA] Create Spark 3.4.2 shim and build env|
-|[#10076](https://github.com/NVIDIA/spark-rapids/issues/10076)|[FEA] Add performance test framework for non-UTC time zone features.|
-|[#9881](https://github.com/NVIDIA/spark-rapids/issues/9881)|[TASK] Remove `spark.rapids.sql.nonUTC.enabled` configuration option|
-|[#9801](https://github.com/NVIDIA/spark-rapids/issues/9801)|[FEA] Support DateFormat on GPU with a non-UTC timezone|
-|[#6834](https://github.com/NVIDIA/spark-rapids/issues/6834)|[FEA] Support GpuHour expression for timezones other than UTC|
-|[#6842](https://github.com/NVIDIA/spark-rapids/issues/6842)|[FEA] Support TimeZone aware operations for value extraction|
-|[#1860](https://github.com/NVIDIA/spark-rapids/issues/1860)|[FEA] Optimize row based window operations for BOUNDED ranges|
-|[#9606](https://github.com/NVIDIA/spark-rapids/issues/9606)|[FEA] Support unix_timestamp with CST(China Time Zone) support|
-|[#9815](https://github.com/NVIDIA/spark-rapids/issues/9815)|[FEA] Support ```unix_timestamp``` for non-DST timezones|
-|[#8807](https://github.com/NVIDIA/spark-rapids/issues/8807)|[FEA] support ‘yyyyMMdd’ format in from_unixtime function|
-|[#9605](https://github.com/NVIDIA/spark-rapids/issues/9605)|[FEA] Support from_unixtime with CST(China Time Zone) support|
-|[#6836](https://github.com/NVIDIA/spark-rapids/issues/6836)|[FEA] Support FromUnixTime for non UTC timezones|
-|[#9175](https://github.com/NVIDIA/spark-rapids/issues/9175)|[FEA] Support Databricks 13.3|
-|[#6881](https://github.com/NVIDIA/spark-rapids/issues/6881)|[FEA] Support RAPIDS Spark plugin on ARM|
-|[#9274](https://github.com/NVIDIA/spark-rapids/issues/9274)|[FEA] Regular deploy process to include arm artifacts|
-|[#9844](https://github.com/NVIDIA/spark-rapids/issues/9844)|[FEA] Let Gpu arrow python runners support writing one batch one time for the single threaded model.|
-|[#7309](https://github.com/NVIDIA/spark-rapids/issues/7309)|[FEA] Detect multiple versions of the RAPIDS jar on the classpath at the same time|
-
-### Performance
-|||
-|:---|:---|
-|[#9442](https://github.com/NVIDIA/spark-rapids/issues/9442)|[FEA] For hash joins where the build side can change use the smaller table for the build side|
-|[#10142](https://github.com/NVIDIA/spark-rapids/issues/10142)|[TASK] Benchmark existing timestamp functions that work in non-UTC time zone (non-DST)|
-
-### Bugs Fixed
-|||
-|:---|:---|
-|[#10548](https://github.com/NVIDIA/spark-rapids/issues/10548)|[BUG] test_dpp_bypass / test_dpp_via_aggregate_subquery failures in CI Databricks 13.3|
-|[#10530](https://github.com/NVIDIA/spark-rapids/issues/10530)|test_delta_merge_match_delete_only java.lang.OutOfMemoryError: GC overhead limit exceeded|
-|[#10464](https://github.com/NVIDIA/spark-rapids/issues/10464)|[BUG] spark334 and spark342 shims missed in scala2.13 dist jar|
-|[#10473](https://github.com/NVIDIA/spark-rapids/issues/10473)|[BUG] Leak when running RANK query|
-|[#10432](https://github.com/NVIDIA/spark-rapids/issues/10432)|Plug-in Build Failing for Databricks 11.3 |
-|[#9974](https://github.com/NVIDIA/spark-rapids/issues/9974)|[BUG] host memory Leak in MultiFileCoalescingPartitionReaderBase in UTC time zone|
-|[#10359](https://github.com/NVIDIA/spark-rapids/issues/10359)|[BUG] Build failure on Databricks nightly run with `GpuMapInPandasExecMeta`|
-|[#10327](https://github.com/NVIDIA/spark-rapids/issues/10327)|[BUG] Unit test FAILED against : SPARK-24957: average with decimal followed by aggregation returning wrong result |
-|[#10324](https://github.com/NVIDIA/spark-rapids/issues/10324)|[BUG] hash_aggregate_test.py test FAILED:  Type conversion is not allowed from Table {...}|
-|[#10291](https://github.com/NVIDIA/spark-rapids/issues/10291)|[BUG] SIGSEGV in libucp.so|
-|[#9212](https://github.com/NVIDIA/spark-rapids/issues/9212)|[BUG] `from_json` fails with cuDF error `Invalid list size computation error`|
-|[#10264](https://github.com/NVIDIA/spark-rapids/issues/10264)|[BUG] hash aggregate test failures due to type conversion errors|
-|[#10262](https://github.com/NVIDIA/spark-rapids/issues/10262)|[BUG] Test "SPARK-24957: average with decimal followed by aggregation returning wrong result" failed.|
-|[#9353](https://github.com/NVIDIA/spark-rapids/issues/9353)|[BUG] [JSON] A mix of lists and structs within the same column is not supported|
-|[#10099](https://github.com/NVIDIA/spark-rapids/issues/10099)|[BUG] orc_test.py::test_orc_scan_with_aggregate_pushdown fails with a standalone cluster on spark 3.3.0|
-|[#10047](https://github.com/NVIDIA/spark-rapids/issues/10047)|[BUG] CudfException during conditional hash join while running nds query64|
-|[#9779](https://github.com/NVIDIA/spark-rapids/issues/9779)|[BUG] 330cdh failed test_hash_reduction_sum_full_decimal on CI|
-|[#10197](https://github.com/NVIDIA/spark-rapids/issues/10197)|[BUG] Disable GetJsonObject by default and update docs|
-|[#10165](https://github.com/NVIDIA/spark-rapids/issues/10165)|[BUG] Databricks 13.3 executor side broadcast failure|
-|[#10224](https://github.com/NVIDIA/spark-rapids/issues/10224)|[BUG] DBR builds fails when installing Maven|
-|[#10222](https://github.com/NVIDIA/spark-rapids/issues/10222)|[BUG] to_utc_timestamp and from_utc_timestamp fallback when TZ is supported time zone|
-|[#10195](https://github.com/NVIDIA/spark-rapids/issues/10195)|[BUG] test_window_aggs_for_negative_rows_partitioned failure in CI|
-|[#10182](https://github.com/NVIDIA/spark-rapids/issues/10182)|[BUG] test_dpp_bypass / test_dpp_via_aggregate_subquery failures in CI (databricks)|
-|[#10169](https://github.com/NVIDIA/spark-rapids/issues/10169)|[BUG] Host column vector leaks when running `test_cast_timestamp_to_date`|
-|[#10050](https://github.com/NVIDIA/spark-rapids/issues/10050)|[BUG] test_cast_decimal_to_decimal[to:DecimalType(1,-1)-from:Decimal(5,-3)] fails with DATAGEN_SEED=1702439569|
-|[#10088](https://github.com/NVIDIA/spark-rapids/issues/10088)|[BUG] GpuExplode single row split to fit cuDF limits|
-|[#10174](https://github.com/NVIDIA/spark-rapids/issues/10174)|[BUG]  json_test.py::test_from_json_struct_timestamp failed on: Part of the plan is not columnar |
-|[#10186](https://github.com/NVIDIA/spark-rapids/issues/10186)|[BUG] test_to_date_with_window_functions failed in non-UTC nightly CI|
-|[#10154](https://github.com/NVIDIA/spark-rapids/issues/10154)|[BUG] 'spark-test.sh' integration tests FAILED on 'ps: command not found" in Rocky Docker environment|
-|[#10175](https://github.com/NVIDIA/spark-rapids/issues/10175)|[BUG] string_test.py::test_format_number_float_special FAILED : AssertionError 'NaN' == |
-|[#10166](https://github.com/NVIDIA/spark-rapids/issues/10166)|Detect Undeclared Shim in POM.xml|
-|[#10170](https://github.com/NVIDIA/spark-rapids/issues/10170)|[BUG] `test_cast_timestamp_to_date` fails with `TZ=Asia/Hebron`|
-|[#10149](https://github.com/NVIDIA/spark-rapids/issues/10149)|[BUG] GPU illegal access detected during delta_byte_array.parquet read|
-|[#9905](https://github.com/NVIDIA/spark-rapids/issues/9905)|[BUG] GpuJsonScan incorrect behavior when parsing dates|
-|[#10163](https://github.com/NVIDIA/spark-rapids/issues/10163)|Spark 3.3.4 Shim Build Failure|
-|[#10105](https://github.com/NVIDIA/spark-rapids/issues/10105)|[BUG] scala:compile is not thread safe unless compiler bridge already exists |
-|[#10026](https://github.com/NVIDIA/spark-rapids/issues/10026)|[BUG] test_hash_agg_with_nan_keys failed with a DATAGEN_SEED=1702335559|
-|[#10075](https://github.com/NVIDIA/spark-rapids/issues/10075)|[BUG] `non-pinned blocking alloc with spill` unit test failed in HostAllocSuite|
-|[#10134](https://github.com/NVIDIA/spark-rapids/issues/10134)|[BUG] test_window_aggs_for_batched_finite_row_windows_partitioned failed on Scala 2.13 with DATAGEN_SEED=1704033145|
-|[#10118](https://github.com/NVIDIA/spark-rapids/issues/10118)|[BUG] non-UTC Nightly CI failed|
-|[#10136](https://github.com/NVIDIA/spark-rapids/issues/10136)|[BUG] The canonicalized version of `GpuFileSourceScanExec`s that suppose to be semantic-equal can be different |
-|[#10110](https://github.com/NVIDIA/spark-rapids/issues/10110)|[BUG] disable collect_list and collect_set for window operations by default.|
-|[#10129](https://github.com/NVIDIA/spark-rapids/issues/10129)|[BUG] Unit test suite fails with `Null data pointer` in GpuTimeZoneDB|
-|[#10089](https://github.com/NVIDIA/spark-rapids/issues/10089)|[BUG] DATAGEN_SEED=<seed> environment does not override the marker datagen_overrides|
-|[#10108](https://github.com/NVIDIA/spark-rapids/issues/10108)|[BUG] @datagen_overrides seed is sticky when it shouldn't be|
-|[#10064](https://github.com/NVIDIA/spark-rapids/issues/10064)|[BUG] test_unsupported_fallback_regexp_replace failed with DATAGEN_SEED=1702662063|
-|[#10117](https://github.com/NVIDIA/spark-rapids/issues/10117)|[BUG] test_from_utc_timestamp failed on Cloudera Env when TZ is Iran|
-|[#9914](https://github.com/NVIDIA/spark-rapids/issues/9914)|[BUG] Report GPU OOM on recent passed CI premerges.|
-|[#10094](https://github.com/NVIDIA/spark-rapids/issues/10094)|[BUG] spark351 PR check failure MockTaskContext method isFailed in class TaskContext of type ()Boolean is not defined|
-|[#10017](https://github.com/NVIDIA/spark-rapids/issues/10017)|[BUG] test_casting_from_double_to_timestamp failed for DATAGEN_SEED=1702329497|
-|[#9992](https://github.com/NVIDIA/spark-rapids/issues/9992)|[BUG] conditionals_test.py::test_conditional_with_side_effects_cast[String] failed with DATAGEN_SEED=1701976979|
-|[#9743](https://github.com/NVIDIA/spark-rapids/issues/9743)|[BUG][AUDIT] SPARK-45652 - SPJ: Handle empty input partitions after dynamic filtering|
-|[#9859](https://github.com/NVIDIA/spark-rapids/issues/9859)|[AUDIT] [SPARK-45786] Inaccurate Decimal multiplication and division results|
-|[#9555](https://github.com/NVIDIA/spark-rapids/issues/9555)|[BUG] Scala 2.13 build with JDK 11 or 17 fails OpcodeSuite tests|
-|[#10073](https://github.com/NVIDIA/spark-rapids/issues/10073)|[BUG] test_csv_prefer_date_with_infer_schema failed with DATAGEN_SEED=1702847907|
-|[#10004](https://github.com/NVIDIA/spark-rapids/issues/10004)|[BUG] If a host memory buffer is spilled, it cannot be unspilled|
-|[#10063](https://github.com/NVIDIA/spark-rapids/issues/10063)|[BUG] CI build failure with 341db: method getKillReason has weaker access privileges; it should be public|
-|[#10055](https://github.com/NVIDIA/spark-rapids/issues/10055)|[BUG]  array_test.py::test_array_transform_non_deterministic failed with non-UTC time zone|
-|[#10056](https://github.com/NVIDIA/spark-rapids/issues/10056)|[BUG] Unit tests ToPrettyStringSuite FAILED on spark-3.5.0|
-|[#10048](https://github.com/NVIDIA/spark-rapids/issues/10048)|[BUG] Fix ```out of range``` error from ```pySpark``` in ```test_timestamp_millis``` and other two integration test cases|
-|[#4204](https://github.com/NVIDIA/spark-rapids/issues/4204)|casting double to string does not match Spark|
-|[#9938](https://github.com/NVIDIA/spark-rapids/issues/9938)|Better to do some refactor for the Python UDF code|
-|[#10018](https://github.com/NVIDIA/spark-rapids/issues/10018)|[BUG] `GpuToUnixTimestampImproved` off by 1 on GPU when handling timestamp before epoch|
-|[#10012](https://github.com/NVIDIA/spark-rapids/issues/10012)|[BUG] test_str_to_map_expr_random_delimiters with DATAGEN_SEED=1702166057 hangs|
-|[#10029](https://github.com/NVIDIA/spark-rapids/issues/10029)|[BUG] doc links fail with 404 for shims.md|
-|[#9472](https://github.com/NVIDIA/spark-rapids/issues/9472)|[BUG] Non-Deterministic expressions in an array_transform can cause errors|
-|[#9884](https://github.com/NVIDIA/spark-rapids/issues/9884)|[BUG] delta_lake_delete_test.py failed assertion [DATAGEN_SEED=1701225104, IGNORE_ORDER...|
-|[#9977](https://github.com/NVIDIA/spark-rapids/issues/9977)|[BUG] test_cast_date_integral fails on databricks 3.4.1|
-|[#9936](https://github.com/NVIDIA/spark-rapids/issues/9936)|[BUG] Nightly CI of non-UTC time zone reports 'year 0 is out of range' error|
-|[#9941](https://github.com/NVIDIA/spark-rapids/issues/9941)|[BUG] A potential data corruption in Pandas UDFs|
-|[#9897](https://github.com/NVIDIA/spark-rapids/issues/9897)|[BUG] Error message for multiple jars on classpath is wrong|
-|[#9916](https://github.com/NVIDIA/spark-rapids/issues/9916)|[BUG] ```test_cast_string_ts_valid_format``` failed at ```seed = 1701362564```|
-|[#9559](https://github.com/NVIDIA/spark-rapids/issues/9559)|[BUG] precommit regularly fails with error trying to download a dependency|
-|[#9708](https://github.com/NVIDIA/spark-rapids/issues/9708)|[BUG] test_cast_string_ts_valid_format fails with DATAGEN_SEED=1699978422|
-
-### PRs
-|||
-|:---|:---|
-|[#10555](https://github.com/NVIDIA/spark-rapids/pull/10555)|Update change log [skip ci]|
-|[#10551](https://github.com/NVIDIA/spark-rapids/pull/10551)|Try to make degenerative joins here impossible for these tests|
-|[#10546](https://github.com/NVIDIA/spark-rapids/pull/10546)|Update changelog [skip ci]|
-|[#10541](https://github.com/NVIDIA/spark-rapids/pull/10541)|Fix Delta log cache size settings during integration tests|
-|[#10525](https://github.com/NVIDIA/spark-rapids/pull/10525)|Update changelog for v24.02.0 release [skip ci]|
-|[#10465](https://github.com/NVIDIA/spark-rapids/pull/10465)|Add missed shims for scala2.13|
-|[#10511](https://github.com/NVIDIA/spark-rapids/pull/10511)|Update rapids jni and private dependency version to 24.02.1|
-|[#10513](https://github.com/NVIDIA/spark-rapids/pull/10513)|Fix scalar leak in SumBinaryFixer (#10510)|
-|[#10475](https://github.com/NVIDIA/spark-rapids/pull/10475)|Fix scalar leak in RankFixer|
-|[#10461](https://github.com/NVIDIA/spark-rapids/pull/10461)|Preserve tags on FileSourceScanExec|
-|[#10459](https://github.com/NVIDIA/spark-rapids/pull/10459)|[DOC] Fix table rendering issue in github.io download UI page on branch-24.02 [skip ci] |
-|[#10443](https://github.com/NVIDIA/spark-rapids/pull/10443)|Update change log for v24.02.0 release [skip ci]|
-|[#10439](https://github.com/NVIDIA/spark-rapids/pull/10439)|Reverts NVIDIA/spark-rapids#10232 and fixes the plugin build on Databricks 11.3|
-|[#10380](https://github.com/NVIDIA/spark-rapids/pull/10380)|Init changelog 24.02 [skip ci]|
-|[#10367](https://github.com/NVIDIA/spark-rapids/pull/10367)|Update rapids JNI and private version to release 24.02.0|
-|[#10414](https://github.com/NVIDIA/spark-rapids/pull/10414)|[DOC] Fix 24.02.0 documentation errors [skip ci]|
-|[#10403](https://github.com/NVIDIA/spark-rapids/pull/10403)|Cherry-pick: Fix a memory leak in json tuple (#10360)|
-|[#10387](https://github.com/NVIDIA/spark-rapids/pull/10387)|[DOC] Update docs for 24.02.0 release [skip ci]|
-|[#10399](https://github.com/NVIDIA/spark-rapids/pull/10399)|Update NOTICE-binary|
-|[#10389](https://github.com/NVIDIA/spark-rapids/pull/10389)|Change version and branch to 24.02 in docs [skip ci]|
-|[#10384](https://github.com/NVIDIA/spark-rapids/pull/10384)|[DOC] Update docs for 23.12.2 release [skip ci] |
-|[#10309](https://github.com/NVIDIA/spark-rapids/pull/10309)|[DOC] add custom 404 page and fix some document issue [skip ci]|
-|[#10352](https://github.com/NVIDIA/spark-rapids/pull/10352)|xfail mixed type test|
-|[#10355](https://github.com/NVIDIA/spark-rapids/pull/10355)|Revert "Support barrier mode for mapInPandas/mapInArrow (#10343)"|
-|[#10353](https://github.com/NVIDIA/spark-rapids/pull/10353)|Use fixed seed for test_from_json_struct_decimal|
-|[#10343](https://github.com/NVIDIA/spark-rapids/pull/10343)|Support barrier mode for mapInPandas/mapInArrow|
-|[#10345](https://github.com/NVIDIA/spark-rapids/pull/10345)|Fix auto merge conflict 10339 [skip ci]|
-|[#9991](https://github.com/NVIDIA/spark-rapids/pull/9991)|Start to use explicit memory limits in the parquet chunked reader|
-|[#10328](https://github.com/NVIDIA/spark-rapids/pull/10328)|Fix typo in spark-tests.sh [skip ci]|
-|[#10279](https://github.com/NVIDIA/spark-rapids/pull/10279)|Run '--packages' only with default cuda11 jar|
-|[#10273](https://github.com/NVIDIA/spark-rapids/pull/10273)|Support reading JSON data with single quotes around attribute names and values|
-|[#10306](https://github.com/NVIDIA/spark-rapids/pull/10306)|Fix performance regression in from_json|
-|[#10272](https://github.com/NVIDIA/spark-rapids/pull/10272)|Add FullOuter support to GpuShuffledSymmetricHashJoinExec|
-|[#10260](https://github.com/NVIDIA/spark-rapids/pull/10260)|Add perf test for time zone operators|
-|[#10275](https://github.com/NVIDIA/spark-rapids/pull/10275)|Add tests for window Python udf with array input|
-|[#10278](https://github.com/NVIDIA/spark-rapids/pull/10278)|Clean up $M2_CACHE to avoid side-effect of previous dependency:get [skip ci]|
-|[#10268](https://github.com/NVIDIA/spark-rapids/pull/10268)|Add config to enable mixed types as string in GpuJsonToStruct & GpuJsonScan|
-|[#10297](https://github.com/NVIDIA/spark-rapids/pull/10297)|Revert "UCX 1.16.0 upgrade (#10190)"|
-|[#10289](https://github.com/NVIDIA/spark-rapids/pull/10289)|Add gerashegalov to CODEOWNERS [skip ci]|
-|[#10290](https://github.com/NVIDIA/spark-rapids/pull/10290)|Fix merge conflict with 23.12 [skip ci]|
-|[#10190](https://github.com/NVIDIA/spark-rapids/pull/10190)|UCX 1.16.0 upgrade|
-|[#10211](https://github.com/NVIDIA/spark-rapids/pull/10211)|Use parse_url kernel for QUERY literal and column key|
-|[#10267](https://github.com/NVIDIA/spark-rapids/pull/10267)|Update to libcudf unsigned sum aggregation types change|
-|[#10208](https://github.com/NVIDIA/spark-rapids/pull/10208)|Added Support for Lazy Quantifier|
-|[#9993](https://github.com/NVIDIA/spark-rapids/pull/9993)|Enable mixed types as string in GpuJsonScan|
-|[#10246](https://github.com/NVIDIA/spark-rapids/pull/10246)|Refactor full join iterator to allow access to build tracker|
-|[#10257](https://github.com/NVIDIA/spark-rapids/pull/10257)|Enable auto-merge from branch-24.02 to branch-24.04 [skip CI]|
-|[#10178](https://github.com/NVIDIA/spark-rapids/pull/10178)|Mark hash reduction decimal overflow test as a permanent seed override|
-|[#10244](https://github.com/NVIDIA/spark-rapids/pull/10244)|Use POSIX mode in assembly plugin to avoid issues with large UID/GID|
-|[#10238](https://github.com/NVIDIA/spark-rapids/pull/10238)|Smoke test with '--package' to fetch the plugin jar|
-|[#10201](https://github.com/NVIDIA/spark-rapids/pull/10201)|Deploy release candidates to local maven repo for dependency check[skip ci]|
-|[#10240](https://github.com/NVIDIA/spark-rapids/pull/10240)|Improved inner joins with large build side|
-|[#10220](https://github.com/NVIDIA/spark-rapids/pull/10220)|Disable GetJsonObject by default and add tests for as many issues with it as possible|
-|[#10230](https://github.com/NVIDIA/spark-rapids/pull/10230)|Fix Databricks 13.3 BroadcastHashJoin using executor side broadcast fed by ColumnarToRow [Databricks]|
-|[#10232](https://github.com/NVIDIA/spark-rapids/pull/10232)|Fixed 330db Shims to Adopt the PythonRunner Changes|
-|[#10225](https://github.com/NVIDIA/spark-rapids/pull/10225)|Download Maven from apache.org archives [skip ci]|
-|[#10210](https://github.com/NVIDIA/spark-rapids/pull/10210)|Add string parameter support for unix_timestamp for non-UTC time zones|
-|[#10223](https://github.com/NVIDIA/spark-rapids/pull/10223)|Fix to_utc_timestamp and from_utc_timestamp fallback when TZ is supported time zone|
-|[#10205](https://github.com/NVIDIA/spark-rapids/pull/10205)|Deterministic ordering in window tests|
-|[#10204](https://github.com/NVIDIA/spark-rapids/pull/10204)|Further prevent degenerative joins in dpp_test|
-|[#10156](https://github.com/NVIDIA/spark-rapids/pull/10156)|Update string to float compatibility doc[skip ci]|
-|[#10193](https://github.com/NVIDIA/spark-rapids/pull/10193)|Fix explode with carry-along columns on GpuExplode single row retry handling|
-|[#10191](https://github.com/NVIDIA/spark-rapids/pull/10191)|Updating the config documentation for filecache configs [skip ci]|
-|[#10131](https://github.com/NVIDIA/spark-rapids/pull/10131)|With a single row GpuExplode tries to split the generator array|
-|[#10179](https://github.com/NVIDIA/spark-rapids/pull/10179)|Fix build regression against Spark 3.2.x|
-|[#10189](https://github.com/NVIDIA/spark-rapids/pull/10189)|test needs marks for non-UTC and for non_supported timezones|
-|[#10176](https://github.com/NVIDIA/spark-rapids/pull/10176)|Fix format_number NaN symbol in high jdk version|
-|[#10074](https://github.com/NVIDIA/spark-rapids/pull/10074)|Update the legacy mode check: only take effect when reading date/timestamp column|
-|[#10167](https://github.com/NVIDIA/spark-rapids/pull/10167)|Defined Shims Should Be Declared In POM |
-|[#10168](https://github.com/NVIDIA/spark-rapids/pull/10168)|Prevent a degenerative join in test_dpp_reuse_broadcast_exchange|
-|[#10171](https://github.com/NVIDIA/spark-rapids/pull/10171)|Fix `test_cast_timestamp_to_date` when running in a DST time zone|
-|[#9975](https://github.com/NVIDIA/spark-rapids/pull/9975)|Improve dateFormat support in GpuJsonScan and make tests consistent with GpuStructsToJson|
-|[#9790](https://github.com/NVIDIA/spark-rapids/pull/9790)|Support float case of format_number with format_float kernel|
-|[#10144](https://github.com/NVIDIA/spark-rapids/pull/10144)|Support to_utc_timestamp|
-|[#10162](https://github.com/NVIDIA/spark-rapids/pull/10162)|Fix Spark 334 Build|
-|[#10146](https://github.com/NVIDIA/spark-rapids/pull/10146)|Refactor the window code so it is not mostly kept in a few very large files|
-|[#10155](https://github.com/NVIDIA/spark-rapids/pull/10155)|Install procps tools for rocky docker images [skip ci]|
-|[#10153](https://github.com/NVIDIA/spark-rapids/pull/10153)|Disable multi-threaded Maven |
-|[#10100](https://github.com/NVIDIA/spark-rapids/pull/10100)|Enable to_date (via gettimestamp and casting timestamp to date) for non-UTC time zones|
-|[#10140](https://github.com/NVIDIA/spark-rapids/pull/10140)|Removed Unnecessary Whitespaces From Spark 3.3.4 Shim [skip ci]|
-|[#10148](https://github.com/NVIDIA/spark-rapids/pull/10148)|fix test_hash_agg_with_nan_keys floating point sum failure|
-|[#10150](https://github.com/NVIDIA/spark-rapids/pull/10150)|Increase timeouts in HostAllocSuite to avoid timeout failures on slow machines|
-|[#10143](https://github.com/NVIDIA/spark-rapids/pull/10143)|Fix `test_window_aggs_for_batched_finite_row_windows_partitioned` fail|
-|[#9887](https://github.com/NVIDIA/spark-rapids/pull/9887)|Reduce time-consuming of pre-merge|
-|[#10130](https://github.com/NVIDIA/spark-rapids/pull/10130)|Change unit tests that force ooms to specify the oom type (gpu|cpu)|
-|[#10138](https://github.com/NVIDIA/spark-rapids/pull/10138)|Update copyright dates in NOTICE files [skip ci]|
-|[#10139](https://github.com/NVIDIA/spark-rapids/pull/10139)|Add Delta Lake 2.3.0 to list of versions to test for Spark 3.3.x|
-|[#10135](https://github.com/NVIDIA/spark-rapids/pull/10135)|Fix CI: can't find script when there is pushd in script [skip ci]|
-|[#10137](https://github.com/NVIDIA/spark-rapids/pull/10137)|Fix the canonicalizing for GPU file scan|
-|[#10132](https://github.com/NVIDIA/spark-rapids/pull/10132)|Disable collect_list and collect_set for window by default|
-|[#10084](https://github.com/NVIDIA/spark-rapids/pull/10084)|Refactor GpuJsonToStruct to reduce code duplication and manage resources more efficiently|
-|[#10087](https://github.com/NVIDIA/spark-rapids/pull/10087)|Additional unit tests for GeneratedInternalRowToCudfRowIterator|
-|[#10082](https://github.com/NVIDIA/spark-rapids/pull/10082)|Add Spark 3.3.4 Shim|
-|[#10054](https://github.com/NVIDIA/spark-rapids/pull/10054)|Support Ascii function for ascii and latin-1|
-|[#10127](https://github.com/NVIDIA/spark-rapids/pull/10127)|Fix merge conflict with branch-23.12|
-|[#10097](https://github.com/NVIDIA/spark-rapids/pull/10097)|[DOC] Update docs for 23.12.1 release [skip ci]|
-|[#10109](https://github.com/NVIDIA/spark-rapids/pull/10109)|Fixes a bug where datagen seed overrides were sticky and adds datagen_seed_override_disabled|
-|[#10093](https://github.com/NVIDIA/spark-rapids/pull/10093)|Fix test_unsupported_fallback_regexp_replace|
-|[#10119](https://github.com/NVIDIA/spark-rapids/pull/10119)|Fix from_utc_timestamp case failure on Cloudera when TZ is Iran|
-|[#10106](https://github.com/NVIDIA/spark-rapids/pull/10106)|Add `isFailed()` to MockTaskContext and Remove MockTaskContextBase.scala|
-|[#10112](https://github.com/NVIDIA/spark-rapids/pull/10112)|Remove datagen seed override for test_conditional_with_side_effects_cast|
-|[#10104](https://github.com/NVIDIA/spark-rapids/pull/10104)|[DOC] Add in docs about memory debugging [skip ci]|
-|[#9925](https://github.com/NVIDIA/spark-rapids/pull/9925)|Use threads, cache Scala compiler in GH mvn workflow|
-|[#9967](https://github.com/NVIDIA/spark-rapids/pull/9967)|Added Spark-3.4.2 Shims|
-|[#10061](https://github.com/NVIDIA/spark-rapids/pull/10061)|Use parse_url kernel for QUERY parsing|
-|[#10101](https://github.com/NVIDIA/spark-rapids/pull/10101)|[DOC] Add column order error docs [skip ci]|
-|[#10078](https://github.com/NVIDIA/spark-rapids/pull/10078)|Add perf test for non-UTC operators|
-|[#10096](https://github.com/NVIDIA/spark-rapids/pull/10096)|Shim MockTaskContext to fix Spark 3.5.1 build|
-|[#10092](https://github.com/NVIDIA/spark-rapids/pull/10092)|Implement Math.round using floor on GPU|
-|[#10085](https://github.com/NVIDIA/spark-rapids/pull/10085)|Update tests that originally restricted the Spark timestamp range|
-|[#10090](https://github.com/NVIDIA/spark-rapids/pull/10090)|Replace GPU-unsupported `\z` with an alternative RLIKE expression|
-|[#10095](https://github.com/NVIDIA/spark-rapids/pull/10095)|Temporarily fix date format failed cases for non-UTC time zone.|
-|[#9999](https://github.com/NVIDIA/spark-rapids/pull/9999)|Add some odd time zones for timezone transition tests|
-|[#9962](https://github.com/NVIDIA/spark-rapids/pull/9962)|Add 3.5.1-SNAPSHOT Shim|
-|[#10071](https://github.com/NVIDIA/spark-rapids/pull/10071)|Cleanup usage of non-utc configuration here|
-|[#10057](https://github.com/NVIDIA/spark-rapids/pull/10057)|Add support for StringConcatFactory.makeConcatWithConstants (#9555)|
-|[#9996](https://github.com/NVIDIA/spark-rapids/pull/9996)|Test full timestamp output range in PySpark|
-|[#10081](https://github.com/NVIDIA/spark-rapids/pull/10081)|Add a fallback Cloudera Maven repo URL [skip ci]|
-|[#10065](https://github.com/NVIDIA/spark-rapids/pull/10065)|Improve host memory spill interfaces|
-|[#10069](https://github.com/NVIDIA/spark-rapids/pull/10069)|Revert "Support split broadcast join condition into ast and non-ast […|
-|[#10070](https://github.com/NVIDIA/spark-rapids/pull/10070)|Fix 332db build failure|
-|[#10060](https://github.com/NVIDIA/spark-rapids/pull/10060)|Fix failed cases for non-utc time zone|
-|[#10038](https://github.com/NVIDIA/spark-rapids/pull/10038)|Remove spark.rapids.sql.nonUTC.enabled configuration option|
-|[#10059](https://github.com/NVIDIA/spark-rapids/pull/10059)|Fixed Failing ToPrettyStringSuite Test for 3.5.0|
-|[#10013](https://github.com/NVIDIA/spark-rapids/pull/10013)|Extended configuration of OOM injection mode|
-|[#10052](https://github.com/NVIDIA/spark-rapids/pull/10052)|Set seed=0 for some integration test cases|
-|[#10053](https://github.com/NVIDIA/spark-rapids/pull/10053)|Remove invalid user from CODEOWNER file [skip ci]|
-|[#10049](https://github.com/NVIDIA/spark-rapids/pull/10049)|Fix out of range error from pySpark in test_timestamp_millis and other two integration test cases|
-|[#9721](https://github.com/NVIDIA/spark-rapids/pull/9721)|Support date_format via Gpu for non-UTC time zone|
-|[#9470](https://github.com/NVIDIA/spark-rapids/pull/9470)|Use float to string kernel|
-|[#9845](https://github.com/NVIDIA/spark-rapids/pull/9845)|Use parse_url kernel for HOST parsing|
-|[#10024](https://github.com/NVIDIA/spark-rapids/pull/10024)|Support hour minute second for non-UTC time zone|
-|[#9973](https://github.com/NVIDIA/spark-rapids/pull/9973)|Batching support for row-based bounded window functions |
-|[#10042](https://github.com/NVIDIA/spark-rapids/pull/10042)|Update tests to not have hard coded fallback when not needed|
-|[#9816](https://github.com/NVIDIA/spark-rapids/pull/9816)|Support unix_timestamp and to_unix_timestamp with non-UTC timezones (non-DST)|
-|[#9902](https://github.com/NVIDIA/spark-rapids/pull/9902)|Some refactor for the Python UDF code|
-|[#10023](https://github.com/NVIDIA/spark-rapids/pull/10023)|GPU supports `yyyyMMdd` format by post process for the `from_unixtime` function|
-|[#10033](https://github.com/NVIDIA/spark-rapids/pull/10033)|Remove GpuToTimestampImproved and spark.rapids.sql.improvedTimeOps.enabled|
-|[#10016](https://github.com/NVIDIA/spark-rapids/pull/10016)|Fix infinite loop in test_str_to_map_expr_random_delimiters|
-|[#9481](https://github.com/NVIDIA/spark-rapids/pull/9481)|Use parse_url kernel for PROTOCOL parsing|
-|[#10030](https://github.com/NVIDIA/spark-rapids/pull/10030)|Update links in shims.md|
-|[#10015](https://github.com/NVIDIA/spark-rapids/pull/10015)|Fix array_transform to not recompute the argument|
-|[#10011](https://github.com/NVIDIA/spark-rapids/pull/10011)|Add cpu oom retry split handling to InternalRowToColumnarBatchIterator|
-|[#10019](https://github.com/NVIDIA/spark-rapids/pull/10019)|Fix auto merge conflict 10010 [skip ci]|
-|[#9760](https://github.com/NVIDIA/spark-rapids/pull/9760)|Support split broadcast join condition into ast and non-ast|
-|[#9827](https://github.com/NVIDIA/spark-rapids/pull/9827)|Enable ORC timestamp and decimal predicate push down tests|
-|[#10002](https://github.com/NVIDIA/spark-rapids/pull/10002)|Use Spark 3.3.3 instead of 3.3.2 for Scala 2.13 premerge builds|
-|[#10000](https://github.com/NVIDIA/spark-rapids/pull/10000)|Optimize from_unixtime|
-|[#10003](https://github.com/NVIDIA/spark-rapids/pull/10003)|Fix merge conflict with branch-23.12|
-|[#9984](https://github.com/NVIDIA/spark-rapids/pull/9984)|Fix 340+(including DB341+) does not support casting date to integral/float|
-|[#9972](https://github.com/NVIDIA/spark-rapids/pull/9972)|Fix year 0 is out of range in test_from_json_struct_timestamp |
-|[#9814](https://github.com/NVIDIA/spark-rapids/pull/9814)|Support from_unixtime via Gpu for non-UTC time zone|
-|[#9929](https://github.com/NVIDIA/spark-rapids/pull/9929)|Add host memory retries for GeneratedInternalRowToCudfRowIterator|
-|[#9957](https://github.com/NVIDIA/spark-rapids/pull/9957)|Update cases for cast between integral and (date/time)|
-|[#9959](https://github.com/NVIDIA/spark-rapids/pull/9959)|Append new authorized user to blossom-ci whitelist [skip ci]|
-|[#9942](https://github.com/NVIDIA/spark-rapids/pull/9942)|Fix a potential data corruption for Pandas UDF|
-|[#9922](https://github.com/NVIDIA/spark-rapids/pull/9922)|Fix `allowMultipleJars` recommend setting message|
-|[#9947](https://github.com/NVIDIA/spark-rapids/pull/9947)|Fix merge conflict with branch-23.12|
-|[#9908](https://github.com/NVIDIA/spark-rapids/pull/9908)|Register default allocator for host memory|
-|[#9944](https://github.com/NVIDIA/spark-rapids/pull/9944)|Fix Java OOM caused by incorrect state of shouldCapture when exception occurred|
-|[#9937](https://github.com/NVIDIA/spark-rapids/pull/9937)|Refactor to use CLASSIFIER instead of CUDA_CLASSIFIER [skip ci]|
-|[#9904](https://github.com/NVIDIA/spark-rapids/pull/9904)|Params for build and test CI scripts on Databricks|
-|[#9719](https://github.com/NVIDIA/spark-rapids/pull/9719)|Support fine grained timezone checker instead of type based|
-|[#9918](https://github.com/NVIDIA/spark-rapids/pull/9918)|Prevent generation of 'year 0 is out of range' strings in IT|
-|[#9852](https://github.com/NVIDIA/spark-rapids/pull/9852)|Avoid generating duplicate nan keys with MapGen(FloatGen)|
-|[#9674](https://github.com/NVIDIA/spark-rapids/pull/9674)|Add cache action to speed up mvn workflow [skip ci]|
-|[#9900](https://github.com/NVIDIA/spark-rapids/pull/9900)|Revert "Remove Databricks 13.3 from release 23.12  (#9890)"|
-|[#9889](https://github.com/NVIDIA/spark-rapids/pull/9889)|Fix test_cast_string_ts_valid_format test|
-|[#9888](https://github.com/NVIDIA/spark-rapids/pull/9888)|Update nightly build and deploy script for arm artifacts [skip ci]|
-|[#9833](https://github.com/NVIDIA/spark-rapids/pull/9833)|Fix a hang for Pandas UDFs on DB 13.3|
-|[#9656](https://github.com/NVIDIA/spark-rapids/pull/9656)|Update for new retry state machine JNI APIs|
-|[#9654](https://github.com/NVIDIA/spark-rapids/pull/9654)|Detect multiple jars on the classpath when init plugin|
-|[#9857](https://github.com/NVIDIA/spark-rapids/pull/9857)|Skip redundant steps in nightly build [skip ci]|
-|[#9812](https://github.com/NVIDIA/spark-rapids/pull/9812)|Update JNI and private dep version to 24.02.0-SNAPSHOT|
-|[#9716](https://github.com/NVIDIA/spark-rapids/pull/9716)|Initiate project version 24.02.0-SNAPSHOT|
-
 ## Older Releases
 Changelog of older releases can be found at [docs/archives](/docs/archives)
diff --git a/build/buildall b/build/buildall
index e8c0610deb7..b3c473be141 100755
--- a/build/buildall
+++ b/build/buildall
@@ -265,7 +265,7 @@ function build_single_shim() {
       -Dmaven.scaladoc.skip \
       -Dmaven.scalastyle.skip="$SKIP_CHECKS" \
       -pl tools -am > "$LOG_FILE" 2>&1 || {
-        [[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -20 "$LOG_FILE" || true
+        [[ "$LOG_FILE" != "/dev/tty" ]] && echo "$LOG_FILE:" && tail -500 "$LOG_FILE" || true
         exit 255
       }
 }
diff --git a/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala b/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala
index 91335afe4e6..14e0d4e0970 100644
--- a/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala
+++ b/datagen/src/main/scala/org/apache/spark/sql/tests/datagen/bigDataGen.scala
@@ -16,21 +16,22 @@
 
 package org.apache.spark.sql.tests.datagen
 
+import com.fasterxml.jackson.core.{JsonFactoryBuilder, JsonParser, JsonToken}
+import com.fasterxml.jackson.core.json.JsonReadFeature
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, Instant, LocalDate, LocalDateTime}
 import java.util
-
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 import scala.util.Random
 
-import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, XXH64}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils}
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions.{approx_count_distinct, avg, coalesce, col, count, lit, stddev, struct, transform, udf, when}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.random.XORShiftRandom
@@ -79,22 +80,28 @@ class RowLocation(val rowNum: Long, val subRows: Array[Int] = null) {
  * hash. This makes the generated data correlated for all column/child columns.
  * @param tableNum a unique ID for the table this is a part of.
  * @param columnNum the location of the column in the data being generated
+ * @param substringNum the location of the substring column
  * @param correlatedKeyGroup the correlated key group this column is a part of, if any.
  */
-case class ColumnLocation(tableNum: Int, columnNum: Int, correlatedKeyGroup: Option[Long] = None) {
-  def forNextColumn(): ColumnLocation = ColumnLocation(tableNum, columnNum + 1)
+case class ColumnLocation(tableNum: Int,
+                          columnNum: Int,
+                          substringNum: Int,
+                          correlatedKeyGroup: Option[Long] = None) {
+  def forNextColumn(): ColumnLocation = ColumnLocation(tableNum, columnNum + 1, 0)
 
+  def forNextSubstring: ColumnLocation = ColumnLocation(tableNum, columnNum, substringNum + 1)
 
   /**
    * Create a new ColumnLocation that is specifically for a given key group
    */
   def forCorrelatedKeyGroup(keyGroup: Long): ColumnLocation =
-    ColumnLocation(tableNum, columnNum, Some(keyGroup))
+    ColumnLocation(tableNum, columnNum, substringNum, Some(keyGroup))
 
   /**
    * Hash the location into a single long value.
    */
-  lazy val hashLoc: Long = XXH64.hashLong(tableNum, correlatedKeyGroup.getOrElse(columnNum))
+  lazy val hashLoc: Long = XXH64.hashLong(tableNum,
+    correlatedKeyGroup.getOrElse(XXH64.hashLong(columnNum, substringNum)))
 }
 
 /**
@@ -115,6 +122,9 @@ case class ColumnConf(columnLoc: ColumnLocation,
   def forNextColumn(nullable: Boolean): ColumnConf =
     ColumnConf(columnLoc.forNextColumn(), nullable, numTableRows)
 
+  def forNextSubstring: ColumnConf =
+    ColumnConf(columnLoc.forNextSubstring, nullable = true, numTableRows)
+
   /**
    * Create a new configuration based on this, but for a given correlated key group.
    */
@@ -303,6 +313,23 @@ case class VarLengthGeneratorFunction(minLength: Int, maxLength: Int) extends
   }
 }
 
+case class StdDevLengthGen(mean: Double,
+                           stdDev: Double,
+                           mapping: LocationToSeedMapping = null) extends
+  LengthGeneratorFunction {
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): LengthGeneratorFunction =
+    StdDevLengthGen(mean, stdDev, mapping)
+
+  override def apply(rowLoc: RowLocation): Int = {
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val g = r.nextGaussian() // g has a mean of 0 and a stddev of 1.0
+    val adjusted = mean + (g * stdDev)
+    // If the range of seed is too small compared to the stddev and mean we will
+    // end up with an invalid distribution, but they asked for it.
+    math.max(0, math.round(adjusted).toInt)
+  }
+}
+
 /**
  * Generate nulls with a given probability.
  * @param prob 0.0 to 1.0 for how often nulls should appear in the output.
@@ -562,11 +589,8 @@ case class DataGenExpr(child: Expression,
   }
 }
 
-/**
- * Base class for generating a column/sub-column. This holds configuration for the column,
- * and handles what is needed to convert it into GeneratorFunction
- */
-abstract class DataGen(var conf: ColumnConf,
+abstract class CommonDataGen(
+    var conf: ColumnConf,
     defaultValueRange: Option[(Any, Any)],
     var seedMapping: LocationToSeedMapping = FlatDistribution(),
     var nullMapping: LocationToSeedMapping = FlatDistribution(),
@@ -576,26 +600,25 @@ abstract class DataGen(var conf: ColumnConf,
   protected var valueRange: Option[(Any, Any)] = defaultValueRange
 
   /**
-   * Set a value range for this data gen.
+   * Set a value range
    */
-  def setValueRange(min: Any, max: Any): DataGen = {
+  def setValueRange(min: Any, max: Any): CommonDataGen = {
     valueRange = Some((min, max))
     this
   }
 
   /**
-   * Set a custom GeneratorFunction to use for this column.
+   * Set a custom GeneratorFunction
    */
-  def setValueGen(f: GeneratorFunction): DataGen = {
+  def setValueGen(f: GeneratorFunction): CommonDataGen = {
     userProvidedValueGen = Some(f)
     this
   }
 
   /**
-   * Set a NullGeneratorFunction for this column. This will not be used
-   * if the column is not nullable.
+   * Set a NullGeneratorFunction
    */
-  def setNullGen(f: NullGeneratorFunction): DataGen = {
+  def setNullGen(f: NullGeneratorFunction): CommonDataGen = {
     this.userProvidedNullGen = Some(f)
     this
   }
@@ -604,12 +627,12 @@ abstract class DataGen(var conf: ColumnConf,
    * Set the probability of a null appearing in the output. The probability should be
    * 0.0 to 1.0.
    */
-  def setNullProbability(probability: Double): DataGen = {
+  def setNullProbability(probability: Double): CommonDataGen = {
     this.userProvidedNullGen = Some(NullProbabilityGenerationFunction(probability))
     this
   }
 
-  def setNullProbabilityRecursively(probability: Double): DataGen = {
+  def setNullProbabilityRecursively(probability: Double): CommonDataGen = {
     this.userProvidedNullGen = Some(NullProbabilityGenerationFunction(probability))
     children.foreach {
       case (_, dataGen) =>
@@ -621,7 +644,7 @@ abstract class DataGen(var conf: ColumnConf,
   /**
    * Set a specific location to seed mapping for the value generation.
    */
-  def setSeedMapping(seedMapping: LocationToSeedMapping): DataGen = {
+  def setSeedMapping(seedMapping: LocationToSeedMapping): CommonDataGen = {
     this.seedMapping = seedMapping
     this
   }
@@ -629,7 +652,7 @@ abstract class DataGen(var conf: ColumnConf,
   /**
    * Set a specific location to seed mapping for the null generation.
    */
-  def setNullMapping(nullMapping: LocationToSeedMapping): DataGen = {
+  def setNullMapping(nullMapping: LocationToSeedMapping): CommonDataGen = {
     this.nullMapping = nullMapping
     this
   }
@@ -638,7 +661,7 @@ abstract class DataGen(var conf: ColumnConf,
    * Set a specific LengthGeneratorFunction to use. This will only be used if
    * the datatype needs a length.
    */
-  def setLengthGen(lengthGen: LengthGeneratorFunction): DataGen = {
+  def setLengthGen(lengthGen: LengthGeneratorFunction): CommonDataGen = {
     this.lengthGen = lengthGen
     this
   }
@@ -646,25 +669,30 @@ abstract class DataGen(var conf: ColumnConf,
   /**
    * Set the length generation to be a fixed length.
    */
-  def setLength(len: Int): DataGen = {
+  def setLength(len: Int): CommonDataGen = {
     this.lengthGen = FixedLengthGeneratorFunction(len)
     this
   }
 
-  def setLength(minLen: Int, maxLen: Int) = {
+  def setLength(minLen: Int, maxLen: Int): CommonDataGen = {
     this.lengthGen = VarLengthGeneratorFunction(minLen, maxLen)
     this
   }
 
+  def setGaussianLength(mean: Double, stdDev: Double): CommonDataGen = {
+    this.lengthGen = StdDevLengthGen(mean, stdDev)
+    this
+  }
+
   /**
    * Add this column to a specific correlated key group. This should not be
    * called directly by users.
    */
   def setCorrelatedKeyGroup(keyGroup: Long,
-      minSeed: Long, maxSeed: Long,
-      seedMapping: LocationToSeedMapping): DataGen = {
+                            minSeed: Long, maxSeed: Long,
+                            seedMapping: LocationToSeedMapping): CommonDataGen = {
     conf = conf.forCorrelatedKeyGroup(keyGroup)
-        .forSeedRange(minSeed, maxSeed)
+      .forSeedRange(minSeed, maxSeed)
     this.seedMapping = seedMapping
     this
   }
@@ -672,7 +700,7 @@ abstract class DataGen(var conf: ColumnConf,
   /**
    * Set a range of seed values that should be returned by the LocationToSeedMapping
    */
-  def setSeedRange(min: Long, max: Long): DataGen = {
+  def setSeedRange(min: Long, max: Long): CommonDataGen = {
     conf = conf.forSeedRange(min, max)
     this
   }
@@ -681,7 +709,7 @@ abstract class DataGen(var conf: ColumnConf,
    * Get the default value generator for this specific data gen.
    */
   protected def getValGen: GeneratorFunction
-  def children: Seq[(String, DataGen)]
+  def children: Seq[(String, CommonDataGen)]
 
   /**
    * Get the final ready to use GeneratorFunction for the data generator.
@@ -690,8 +718,8 @@ abstract class DataGen(var conf: ColumnConf,
     val sm = seedMapping.withColumnConf(conf)
     val lg = lengthGen.withLocationToSeedMapping(sm)
     var valGen = userProvidedValueGen.getOrElse(getValGen)
-        .withLocationToSeedMapping(sm)
-        .withLengthGeneratorFunction(lg)
+      .withLocationToSeedMapping(sm)
+      .withLengthGeneratorFunction(lg)
     valueRange.foreach {
       case (min, max) =>
         valGen = valGen.withValueRange(min, max)
@@ -700,35 +728,75 @@ abstract class DataGen(var conf: ColumnConf,
       val nullColConf = conf.forNulls
       val nm = nullMapping.withColumnConf(nullColConf)
       userProvidedNullGen.get
-          .withWrapped(valGen)
-          .withLocationToSeedMapping(nm)
+        .withWrapped(valGen)
+        .withLocationToSeedMapping(nm)
     } else {
       valGen
     }
   }
 
-  /**
-   * Get the data type for this column
-   */
-  def dataType: DataType
-
   /**
    * Is this column nullable or not.
    */
   def nullable: Boolean = conf.nullable
 
   /**
-   * Get a child column for a given name, if it has one.
+   * Get a child for a given name, if it has one.
    */
-  final def apply(name: String): DataGen = {
+  final def apply(name: String): CommonDataGen = {
     get(name).getOrElse{
       throw new IllegalStateException(s"Could not find a child $name for $this")
     }
   }
 
-  def get(name: String): Option[DataGen] = None
+  def get(name: String): Option[CommonDataGen] = None
+}
+
+
+/**
+ * Base class for generating a column/sub-column. This holds configuration
+ * for the column, and handles what is needed to convert it into GeneratorFunction
+ */
+abstract class DataGen(
+    conf: ColumnConf,
+    defaultValueRange: Option[(Any, Any)],
+    seedMapping: LocationToSeedMapping = FlatDistribution(),
+    nullMapping: LocationToSeedMapping = FlatDistribution(),
+    lengthGen: LengthGeneratorFunction = FixedLengthGeneratorFunction(10)) extends
+  CommonDataGen(conf, defaultValueRange, seedMapping, nullMapping, lengthGen) {
+
+  /**
+   * Get the data type for this column
+   */
+  def dataType: DataType
+
+  override def get(name: String): Option[DataGen] = None
+
+  def getSubstringGen: Option[SubstringDataGen] = None
+
+  def substringGen: SubstringDataGen =
+    getSubstringGen.getOrElse(
+      throw new IllegalArgumentException("substring data gen was not set"))
+
+  def setSubstringGen(f : ColumnConf => SubstringDataGen): Unit =
+    setSubstringGen(Option(f(conf.forNextSubstring)))
+
+  def setSubstringGen(subgen: Option[SubstringDataGen]): Unit =
+    throw new IllegalArgumentException("substring data gens can only be set for a STRING")
 }
 
+/**
+ * Base class for generating a sub-string. This holds configuration
+ * for the substring, and handles what is needed to convert it into a GeneratorFunction
+ */
+abstract class SubstringDataGen(
+    conf: ColumnConf,
+    defaultValueRange: Option[(Any, Any)],
+    seedMapping: LocationToSeedMapping = FlatDistribution(),
+    nullMapping: LocationToSeedMapping = FlatDistribution(),
+    lengthGen: LengthGeneratorFunction = FixedLengthGeneratorFunction(10)) extends
+  CommonDataGen(conf, defaultValueRange, seedMapping, nullMapping, lengthGen) {}
+
 /**
  * A special GeneratorFunction that just returns the computed seed. This is helpful for
  * debugging distributions or if you want long values without any abstraction in between.
@@ -1494,155 +1562,866 @@ class FloatGen(conf: ColumnConf, defaultValueRange: Option[(Any, Any)])
   override def children: Seq[(String, DataGen)] = Seq.empty
 }
 
-trait JSONType {
-  def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit
-}
+case class JsonPathElement(name: String, is_array: Boolean)
+case class JsonLevel(path: Array[JsonPathElement], data_type: String, length: Int, value: String) {}
+
+object JsonColumnStats {
+  private def printHelp(): Unit = {
+    println("JSON Fingerprinting Tool:")
+    println("PARAMS: <inputPath> <outputPath>")
+    println("  <inputPath> is a path to a Spark dataframe to read in")
+    println("  <outputPath> is a path in a Spark file system to write out fingerprint data to.")
+    println()
+    println("OPTIONS:")
+    println("  --json=<COLUMN>       where <COLUMN> is the name of a top level String column")
+    println("  --anon=<SEED>         where <SEED> is a SEED used to anonymize the JSON keys ")
+    println("                        and column names.")
+    println("  --input_format=<TYPE> where <TYPE> is parquet or ORC. Defaults to parquet.")
+    println("  --overwrite           to enable overwriting the fingerprint output.")
+    println("  --debug               to enable some debug information to be printed out")
+    println("  --help                to print out this help message")
+    println()
+  }
+
+  def main(args: Array[String]): Unit = {
+    var inputPath = Option.empty[String]
+    var outputPath = Option.empty[String]
+    val jsonColumns = ArrayBuffer.empty[String]
+    var anonSeed = Option.empty[Long]
+    var debug = false
+    var argsDone = false
+    var format = "parquet"
+    var overwrite = false
+
+    args.foreach {
+      case a if !argsDone && a.startsWith("--json=") =>
+        jsonColumns += a.substring("--json=".length)
+      case a if !argsDone && a.startsWith("--anon=") =>
+        anonSeed = Some(a.substring("--anon=".length).toLong)
+      case a if !argsDone && a.startsWith("--input_format=") =>
+        format = a.substring("--input_format=".length).toLowerCase(java.util.Locale.US)
+      case "--overwrite" if !argsDone =>
+        overwrite = true
+      case "--debug" if !argsDone =>
+        debug = true
+      case "--help" if !argsDone =>
+        printHelp()
+        System.exit(0)
+      case "--" if !argsDone =>
+        argsDone = true
+      case a if !argsDone && a.startsWith("--") => // "--" was covered above already
+        println(s"ERROR $a is not a supported argument")
+        printHelp()
+        System.exit(-1)
+      case a if inputPath.isEmpty =>
+        inputPath = Some(a)
+      case a if outputPath.isEmpty =>
+        outputPath = Some(a)
+      case a =>
+        println(s"ERROR only two arguments are supported. Found $a")
+        printHelp()
+        System.exit(-1)
+    }
+    if (outputPath.isEmpty) {
+      println("ERROR both an inputPath and an outputPath are required")
+      printHelp()
+      System.exit(-1)
+    }
+
+    val spark = SparkSession.builder.getOrCreate()
+    spark.sparkContext.setLogLevel("WARN")
+
+    val df = spark.read.format(format).load(inputPath.get)
+    jsonColumns.foreach { column =>
+      val fp = fingerPrint(df, df(column), anonSeed)
+      val name = anonSeed.map(s => anonymizeString(column, s)).getOrElse(column)
+      val fullOutPath = s"${outputPath.get}/$name"
+      var writer = fp.write
+      if (overwrite) {
+        writer = writer.mode("overwrite")
+      }
+      if (debug) {
+        anonSeed.foreach { s =>
+          println(s"Keys and columns will be anonymized with seed $s")
+        }
+        println(s"Writing $column fingerprint to $fullOutPath")
+        spark.time(writer.parquet(fullOutPath))
+        println(s"Wrote ${spark.read.parquet(fullOutPath).count} rows")
+        spark.read.parquet(fullOutPath).show()
+      } else {
+        writer.parquet(fullOutPath)
+      }
+    }
+  }
 
-object JSONType {
-  def selectType(depth: Int,
-      maxDepth: Int,
-      r: Random): JSONType = {
-    val toSelectFrom = if (depth < maxDepth) {
-      Seq(QuotedJSONString, JSONLong, JSONDouble, JSONArray, JSONObject)
-    } else {
-      Seq(QuotedJSONString, JSONLong, JSONDouble)
-    }
-    val index = r.nextInt(toSelectFrom.length)
-    toSelectFrom(index)
-  }
-}
-
-object QuotedJSONString extends JSONType {
-  override def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit = {
-    val strValue = r.nextString(r.nextInt(maxStringLength + 1))
-        .replace("\\", "\\\\")
-        .replace("\"", "\\\"")
-        .replace("\n", "\\n")
-        .replace("\r", "\\r")
-        .replace("\b", "\\b")
-        .replace("\f", "\\f")
-    sb.append('"')
-    sb.append(strValue)
-    sb.append('"')
-  }
-}
-
-object JSONLong extends JSONType {
-  override def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit = {
-    sb.append(r.nextLong())
-  }
-}
-
-object JSONDouble extends JSONType {
-  override def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit = {
-    sb.append(r.nextDouble() * 4096.0)
-  }
-}
-
-object JSONArray extends JSONType {
-  override def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit = {
-    val childType = JSONType.selectType(depth, maxDepth, r)
-    val length = r.nextInt(maxArrayLength + 1)
-    sb.append("[")
+  case class JsonNodeStats(count: Long, meanLen: Double, stdDevLength: Double, dc: Long)
+
+  class JsonNode() {
+    private val forDataType =
+      mutable.HashMap[String, (JsonNodeStats, mutable.HashMap[String, JsonNode])]()
+
+    def getChild(name: String, isArray: Boolean): JsonNode = {
+      val dt = if (isArray) { "ARRAY" } else { "OBJECT" }
+      val typed = forDataType.getOrElse(dt,
+        throw new IllegalArgumentException(s"$dt is not a set data type yet."))
+      typed._2.getOrElse(name,
+        throw new IllegalArgumentException(s"$name is not a child when the type is $dt"))
+    }
+
+    def contains(name: String, isArray: Boolean): Boolean = {
+      val dt = if (isArray) { "ARRAY" } else { "OBJECT" }
+      forDataType.get(dt).exists { children =>
+        children._2.contains(name)
+      }
+    }
+
+    def addChild(name: String, isArray: Boolean): JsonNode = {
+      val dt = if (isArray) { "ARRAY" } else { "OBJECT" }
+      val found = forDataType.getOrElse(dt,
+        throw new IllegalArgumentException(s"$dt was not already added as a data type"))
+      if (found._2.contains(name)) {
+        throw new IllegalArgumentException(s"$dt already has a child named $name")
+      }
+      val node = new JsonNode()
+      found._2.put(name, node)
+      node
+    }
+
+    def addChoice(dt: String, stats: JsonNodeStats): Unit = {
+      if (forDataType.contains(dt)) {
+        throw new IllegalArgumentException(s"$dt was already added as a data type")
+      }
+      forDataType.put(dt, (stats, new mutable.HashMap[String, JsonNode]()))
+    }
+
+    override def toString: String = {
+      forDataType.toString()
+    }
+
+    def totalCount: Long = {
+      forDataType.values.map{ case (stats, _) => stats.count}.sum
+    }
+
+    private def makeNoChoiceGenRecursive(dt: String,
+                                         children: mutable.HashMap[String, JsonNode],
+                                         cc: ColumnConf): (SubstringDataGen, ColumnConf) = {
+      var c = cc
+      val ret = dt match {
+        case "LONG" => new JSONLongGen(c)
+        case "DOUBLE" => new JSONDoubleGen(c)
+        case "BOOLEAN" => new JSONBoolGen(c)
+        case "NULL" => new JSONNullGen(false, c)
+        case "VALUE_NULL" => new JSONNullGen(true, c)
+        case "ERROR" => new JSONErrorGen(c)
+        case "STRING" => new JSONStringGen(c)
+        case "ARRAY" =>
+          val child = if (children.isEmpty) {
+            // A corner case, we will just make it a BOOL column and it will be ignored
+            val tmp = new JSONBoolGen(c)
+            c = c.forNextSubstring
+            tmp
+          } else {
+            val tmp = children.values.head.makeGenRecursive(c)
+            c = tmp._2
+            tmp._1
+          }
+          new JSONArrayGen(child, c)
+        case "OBJECT" =>
+          val childGens = if (children.isEmpty) {
+            Seq.empty
+          } else {
+            children.toSeq.map {
+              case (k, node) =>
+                val tmp = node.makeGenRecursive(c)
+                c = tmp._2
+                (k, tmp._1)
+            }
+          }
+          new JSONObjectGen(childGens, c)
+        case other =>
+          throw new IllegalArgumentException(s"$other is not a leaf node type")
+      }
+      (ret, c.forNextSubstring)
+    }
+
+    private def makeGenRecursive(cc: ColumnConf): (SubstringDataGen, ColumnConf) = {
+      var c = cc
+      // We are going to recursively walk the tree for all of the values.
+      if (forDataType.size == 1) {
+        // We don't need a choice at all. This makes it simpler..
+        val (dt, (_, children)) = forDataType.head
+        makeNoChoiceGenRecursive(dt, children, c)
+      } else {
+        val totalSum = forDataType.map(f => f._2._1.count).sum.toDouble
+        var runningSum = 0L
+        val allChoices = ArrayBuffer[(Double, String, SubstringDataGen)]()
+        forDataType.foreach {
+          case (dt, (stats, children)) =>
+            val tmp = makeNoChoiceGenRecursive(dt, children, c)
+            c = tmp._2
+            runningSum += stats.count
+            allChoices.append((runningSum/totalSum, dt, tmp._1))
+        }
+
+        val ret = new JSONChoiceGen(allChoices.toSeq, c)
+        (ret, c.forNextSubstring)
+      }
+    }
+
+    def makeGen(cc: ColumnConf): SubstringDataGen = {
+      val (ret, _) = makeGenRecursive(cc)
+      ret
+    }
+
+    def setStatsSingle(dg: CommonDataGen,
+                       dt: String,
+                       stats: JsonNodeStats,
+                       nullPct: Double): Unit = {
+
+      val includeLength = dt != "OBJECT" && dt != "BOOLEAN" && dt != "NULL" && dt != "VALUE_NULL"
+      val includeNullPct = nullPct > 0.0
+      if (includeLength) {
+        dg.setGaussianLength(stats.meanLen, stats.stdDevLength)
+      }
+      if (includeNullPct) {
+        dg.setNullProbability(nullPct)
+      }
+      dg.setSeedRange(1, stats.dc)
+    }
+
+    def setStats(dg: CommonDataGen,
+                 parentCount: Option[Long]): Unit  = {
+      // We are going to recursively walk the tree...
+      if (forDataType.size == 1) {
+        // We don't need a choice at all. This makes it simpler..
+        val (dt, (stats, children)) = forDataType.head
+        val nullPct = parentCount.map { pc =>
+          (pc - stats.count).toDouble/pc
+        }.getOrElse(0.0)
+        setStatsSingle(dg, dt, stats, nullPct)
+        val myCount = if (dt == "OBJECT") {
+          Some(totalCount)
+        } else {
+          None
+        }
+        children.foreach {
+          case (name, node) =>
+            node.setStats(dg(name), myCount)
+        }
+      } else {
+        // We have choices to make between different types.
+        // The null percent cannot be calculated for each individual choice
+        // but is calculated on the group as a whole instead
+        parentCount.foreach { pc =>
+          val tc = totalCount
+          val choiceNullPct = (pc - tc).toDouble / pc
+          if (choiceNullPct > 0.0) {
+            dg.setNullProbability(choiceNullPct)
+          }
+        }
+        forDataType.foreach {
+          case (dt, (stats, children)) =>
+            // When there is a choice the name to access it is the data type
+            val choiceDg = dg(dt)
+            setStatsSingle(choiceDg, dt, stats, 0.0)
+            children.foreach {
+              case (name, node) =>
+                val myCount = if (dt == "OBJECT") {
+                  // Here we only want the count for the OBJECTs
+                  Some(stats.count)
+                } else {
+                  None
+                }
+                node.setStats(choiceDg(name), myCount)
+            }
+        }
+      }
+    }
+  }
+
+  private lazy val jsonFactory = new JsonFactoryBuilder()
+    // The two options below enabled for Hive compatibility
+    .enable(JsonReadFeature.ALLOW_UNESCAPED_CONTROL_CHARS)
+    .enable(JsonReadFeature.ALLOW_SINGLE_QUOTES)
+    .build()
+
+  private def processNext(parser: JsonParser,
+                          currentPath: ArrayBuffer[JsonPathElement],
+                          output: ArrayBuffer[JsonLevel]): Unit = {
+    parser.currentToken() match {
+      case JsonToken.START_OBJECT =>
+        parser.nextToken()
+        while (parser.currentToken() != JsonToken.END_OBJECT) {
+          processNext(parser, currentPath, output)
+        }
+        output.append(JsonLevel(currentPath.toArray, "OBJECT", 0, ""))
+        parser.nextToken()
+      case JsonToken.START_ARRAY =>
+        currentPath.append(JsonPathElement("data", is_array = true))
+        parser.nextToken()
+        var length = 0
+        while (parser.currentToken() != JsonToken.END_ARRAY) {
+          length += 1
+          processNext(parser, currentPath, output)
+        }
+        currentPath.remove(currentPath.length - 1)
+        output.append(JsonLevel(currentPath.toArray, "ARRAY", length, ""))
+        parser.nextToken()
+      case JsonToken.FIELD_NAME =>
+        currentPath.append(JsonPathElement(parser.getCurrentName, is_array = false))
+        parser.nextToken()
+        processNext(parser, currentPath, output)
+        currentPath.remove(currentPath.length - 1)
+      case JsonToken.VALUE_NUMBER_INT =>
+        val length = parser.getValueAsString.getBytes("UTF-8").length
+        output.append(JsonLevel(currentPath.toArray, "LONG", length, parser.getValueAsString))
+        parser.nextToken()
+      case JsonToken.VALUE_NUMBER_FLOAT =>
+        val length = parser.getValueAsString.getBytes("UTF-8").length
+        output.append(JsonLevel(currentPath.toArray, "DOUBLE", length, parser.getValueAsString))
+        parser.nextToken()
+      case JsonToken.VALUE_TRUE | JsonToken.VALUE_FALSE =>
+        val length = parser.getValueAsString.getBytes("UTF-8").length
+        output.append(JsonLevel(currentPath.toArray, "BOOLEAN", length, parser.getValueAsString))
+        parser.nextToken()
+      case JsonToken.VALUE_NULL | null =>
+        output.append(JsonLevel(currentPath.toArray, "VALUE_NULL", 4, "NULL"))
+        parser.nextToken()
+      case JsonToken.VALUE_STRING =>
+        val length = parser.getValueAsString.getBytes("UTF-8").length
+        output.append(JsonLevel(currentPath.toArray, "STRING", length, parser.getValueAsString))
+        parser.nextToken()
+      case other =>
+        throw new IllegalStateException(s"DON'T KNOW HOW TO DEAL WITH $other")
+    }
+  }
+
+  def jsonStatsUdf(json: String): Array[JsonLevel] = {
+    val output = new ArrayBuffer[JsonLevel]()
+    try {
+      val currentPath = new ArrayBuffer[JsonPathElement]()
+      if (json == null) {
+        output.append(JsonLevel(Array.empty, "NULL", 0, ""))
+      } else {
+        val parser = jsonFactory.createParser(json)
+        try {
+          parser.nextToken()
+          processNext(parser, currentPath, output)
+        } finally {
+          parser.close()
+        }
+      }
+    } catch {
+      case _: com.fasterxml.jackson.core.JsonParseException =>
+        output.clear()
+        output.append(JsonLevel(Array.empty, "ERROR", json.getBytes("UTF-8").length, json))
+    }
+    output.toArray
+  }
+
+  private lazy val extractPaths = udf(json => jsonStatsUdf(json))
+
+  def anonymizeString(str: String, seed: Long): String = {
+    val length = str.length
+    val data = new Array[Byte](length)
+    val hash = XXH64.hashLong(str.hashCode, seed)
+    val r = new Random()
+    r.setSeed(hash)
     (0 until length).foreach { i =>
-      if (i > 0) {
-        sb.append(",")
+      val tmp = r.nextInt(16)
+      data(i) = (tmp + 'A').toByte
+    }
+    new String(data)
+  }
+
+  private lazy val anonPath = udf((str, seed) => anonymizeString(str, seed))
+
+  def anonymizeFingerPrint(df: DataFrame, anonSeed: Long): DataFrame = {
+    df.withColumn("tmp", transform(col("path"),
+        o => {
+          val name = o("name")
+          val isArray = o("is_array")
+          val anon = anonPath(name, lit(anonSeed))
+          val newName = when(isArray, name).otherwise(anon).alias("name")
+          struct(newName, isArray)
+        }))
+      .drop("path").withColumnRenamed("tmp", "path")
+      .orderBy("path", "dt")
+      .selectExpr("path", "dt","c","mean_len","stddev_len","distinct","version")
+  }
+
+  def fingerPrint(df: DataFrame, column: Column, anonymize: Option[Long] = None): DataFrame = {
+    val ret = df.select(extractPaths(column).alias("paths"))
+      .selectExpr("explode_outer(paths) as p")
+      .selectExpr("p.path as path", "p.data_type as dt", "p.length as len", "p.value as value")
+      .groupBy(col("path"), col("dt")).agg(
+        count(lit(1)).alias("c"),
+        avg(col("len")).alias("mean_len"),
+        coalesce(stddev(col("len")), lit(0.0)).alias("stddev_len"),
+        approx_count_distinct(col("value")).alias("distinct"))
+      .orderBy("path", "dt").withColumn("version", lit("0.1"))
+      .selectExpr("path", "dt","c","mean_len","stddev_len","distinct","version")
+
+    anonymize.map { anonSeed =>
+      anonymizeFingerPrint(ret, anonSeed)
+    }.getOrElse(ret)
+  }
+
+  def apply(aggForColumn: DataFrame, genColumn: ColumnGen): Unit =
+    apply(aggForColumn, genColumn.dataGen)
+
+  private val expectedSchema = StructType.fromDDL(
+    "path ARRAY<STRUCT<name: STRING, is_array: BOOLEAN>>," +
+      "dt STRING," +
+      "c BIGINT," +
+      "mean_len DOUBLE," +
+      "stddev_len DOUBLE," +
+      "distinct BIGINT," +
+      "version STRING")
+
+  def apply(aggForColumn: DataFrame, gen: DataGen): Unit = {
+    val aggData = aggForColumn.orderBy("path", "dt").collect()
+    val rootNode: JsonNode = new JsonNode()
+    assert(aggData.length > 0)
+    val schema = aggData.head.schema
+    assert(schema.length == expectedSchema.length)
+    schema.fields.zip(expectedSchema.fields).foreach {
+      case(found, expected) =>
+        assert(found.name == expected.name)
+        // TODO we can worry about the exact types later if we need to
+    }
+    assert(aggData.head.getString(6) == "0.1")
+    aggData.foreach { row =>
+      val fullPath = row.getAs[mutable.WrappedArray[Row]](0)
+      val parsedPath = fullPath.map(r => (r.getString(0), r.getBoolean(1))).toList
+      val dt = row.getString(1)
+      val count = row.getLong(2)
+      val meanLen = row.getDouble(3)
+      val stdLen = row.getDouble(4)
+      val dc = row.getLong(5)
+
+      val stats = JsonNodeStats(count, meanLen, stdLen, dc)
+      var currentNode = rootNode
+      // Find everything up to the last path element
+      if (parsedPath.length > 1) {
+        parsedPath.slice(0, parsedPath.length - 1).foreach {
+          case (name, isArray) =>
+            currentNode = currentNode.getChild(name, isArray)
+        }
+      }
+
+      if (parsedPath.nonEmpty) {
+        // For the last path element (that is not the root element) we might need to add it
+        // as a child
+        val (name, isArray) = parsedPath.last
+        if (!currentNode.contains(name, isArray)) {
+          currentNode.addChild(name, isArray)
+        }
+        currentNode = currentNode.getChild(name, isArray)
       }
-      childType.appendRandomValue(sb, i, maxStringLength, maxArrayLength, maxObjectLength,
-        depth + 1, maxDepth, r)
+      currentNode.addChoice(dt, stats)
     }
-    sb.append("]")
+
+    gen.setSubstringGen(cc => rootNode.makeGen(cc))
+    rootNode.setStats(gen.substringGen, None)
   }
 }
 
-object JSONObject extends JSONType {
-  override def appendRandomValue(sb: StringBuilder,
-      index: Int,
-      maxStringLength: Int,
-      maxArrayLength: Int,
-      maxObjectLength: Int,
-      depth: Int,
-      maxDepth: Int,
-      r: Random): Unit = {
-    val length = r.nextInt(maxObjectLength) + 1
-    sb.append("{")
-    (0 until length).foreach { i =>
-      if (i > 0) {
-        sb.append(",")
+
+case class JSONStringGenFunc(lengthGen: LengthGeneratorFunction = null,
+                             mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    val len = lengthGen(rowLoc)
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val buffer = new Array[Byte](len)
+    var at = 0
+    while (at < len) {
+      // Value range is 32 (Space) to 126 (~)
+      buffer(at) = (r.nextInt(126 - 31) + 32).toByte
+      at += 1
+    }
+    val strVal = new String(buffer, 0, len)
+            .replace("\\", "\\\\")
+            .replace("\"", "\\\"")
+            .replace("\n", "\\n")
+            .replace("\r", "\\r")
+            .replace("\b", "\\b")
+            .replace("\f", "\\f")
+    '"' + strVal + '"'
+  }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONStringGenFunc =
+    JSONStringGenFunc(lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONStringGenFunc =
+    JSONStringGenFunc(lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONStringGen(conf: ColumnConf,
+                  defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONStringGenFunc()
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONLongGenFunc(lengthGen: LengthGeneratorFunction = null,
+                    mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    val len = math.max(lengthGen(rowLoc), 1) // We need at least 1 long for a valid value
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val buffer = new Array[Byte](len)
+    var at = 0
+    while (at < len) {
+      if (at == 0) {
+        // No leading 0's
+        buffer(at) = (r.nextInt(9) + '1').toByte
+      } else {
+        buffer(at) = (r.nextInt(10) + '0').toByte
       }
-      sb.append("\"key_")
-      sb.append(i)
-      sb.append("_")
-      sb.append(depth )
-      sb.append("\":")
-      val childType = JSONType.selectType(depth, maxDepth, r)
-      childType.appendRandomValue(sb, i, maxStringLength, maxArrayLength, maxObjectLength,
-        depth + 1, maxDepth, r)
+      at += 1
     }
-    sb.append("}")
+    new String(buffer, 0, len)
   }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONLongGenFunc =
+    JSONLongGenFunc(lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONLongGenFunc =
+    JSONLongGenFunc(lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
 }
 
-case class JSONGenFunc(
-    maxStringLength: Int,
-    maxArrayLength: Int,
-    maxObjectLength: Int,
-    maxDepth: Int,
-    lengthGen: LengthGeneratorFunction = null,
-    mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+class JSONLongGen(conf: ColumnConf,
+                  defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONLongGenFunc()
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONDoubleGenFunc(lengthGen: LengthGeneratorFunction = null,
+                             mapping: LocationToSeedMapping = null) extends GeneratorFunction {
 
   override def apply(rowLoc: RowLocation): Any = {
+    val len = math.max(lengthGen(rowLoc), 3) // We have to have at least 3 chars NUM.NUM
     val r = DataGen.getRandomFor(rowLoc, mapping)
-    val sb = new StringBuilder()
-    JSONObject.appendRandomValue(sb, 0, maxStringLength, maxArrayLength, maxObjectLength,
-      0, maxDepth, r)
-    // For now I am going to have some hard coded keys
-    UTF8String.fromString(sb.toString())
+    val beforeLen = if (len == 3) { 1 } else { r.nextInt(len - 3) + 1 }
+    val buffer = new Array[Byte](len)
+    var at = 0
+    while (at < len) {
+      if (at == 0) {
+        // No leading 0's
+        buffer(at) = (r.nextInt(9) + '1').toByte
+      } else if (at == beforeLen) {
+        buffer(at) = '.'
+      } else {
+        buffer(at) = (r.nextInt(10) + '0').toByte
+      }
+      at += 1
+    }
+    UTF8String.fromBytes(buffer, 0, len)
   }
 
-  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): GeneratorFunction =
-    JSONGenFunc(maxStringLength, maxArrayLength, maxObjectLength, maxDepth, lengthGen, mapping)
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONDoubleGenFunc =
+    JSONDoubleGenFunc(lengthGen, mapping)
 
-  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): GeneratorFunction =
-    JSONGenFunc(maxStringLength, maxArrayLength, maxObjectLength, maxDepth, lengthGen, mapping)
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONDoubleGenFunc =
+    JSONDoubleGenFunc(lengthGen, mapping)
 
   override def withValueRange(min: Any, max: Any): GeneratorFunction =
-    throw new IllegalArgumentException("value ranges are not supported for strings")
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONDoubleGen(conf: ColumnConf,
+                  defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONDoubleGenFunc()
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONBoolGenFunc(lengthGen: LengthGeneratorFunction = null,
+                    mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val ret = if (r.nextBoolean()) "true" else "false"
+    UTF8String.fromString(ret)
+  }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONBoolGenFunc =
+    JSONBoolGenFunc(lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONBoolGenFunc =
+    JSONBoolGenFunc(lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONBoolGen(conf: ColumnConf,
+                 defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONBoolGenFunc()
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONNullGenFunc(nullAsString: Boolean,
+                           lengthGen: LengthGeneratorFunction = null,
+                           mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any =
+    if (nullAsString) {
+      UTF8String.fromString("null")
+    } else {
+      null
+    }
+
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONNullGenFunc =
+    JSONNullGenFunc(nullAsString, lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONNullGenFunc =
+    JSONNullGenFunc(nullAsString, lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONNullGen(nullAsString: Boolean,
+                  conf: ColumnConf,
+                  defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONNullGenFunc(nullAsString)
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONErrorGenFunc(lengthGen: LengthGeneratorFunction = null,
+                            mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    val len = lengthGen(rowLoc)
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val buffer = new Array[Byte](len)
+    var at = 0
+    while (at < len) {
+      // Value range is 32 (Space) to 126 (~)
+      // But it is almost impossible to show up as valid JSON
+      buffer(at) = (r.nextInt(126 - 31) + 32).toByte
+      at += 1
+    }
+    UTF8String.fromBytes(buffer, 0, len)
+  }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONErrorGenFunc =
+    JSONErrorGenFunc(lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONErrorGenFunc =
+    JSONErrorGenFunc(lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONErrorGen(conf: ColumnConf,
+                  defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override protected def getValGen: GeneratorFunction = JSONErrorGenFunc()
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq.empty
+}
+
+case class JSONArrayGenFunc(child: GeneratorFunction,
+                     lengthGen: LengthGeneratorFunction = null,
+                     mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    val len = lengthGen(rowLoc)
+    val data = new Array[String](len)
+    val childRowLoc = rowLoc.withNewChild()
+    var i = 0
+    while (i < len) {
+      childRowLoc.setLastChildIndex(i)
+      val v = child(childRowLoc)
+      if (v == null) {
+        // A null in an array must look like "null"
+        data(i) = "null"
+      } else {
+        data(i) = v.toString
+      }
+      i += 1
+    }
+    val ret = data.mkString("[", ",", "]")
+    UTF8String.fromString(ret)
+  }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONArrayGenFunc =
+    JSONArrayGenFunc(child, lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONArrayGenFunc =
+    JSONArrayGenFunc(child, lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONArrayGen(child: SubstringDataGen,
+               conf: ColumnConf,
+               defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override def setCorrelatedKeyGroup(keyGroup: Long,
+                                     minSeed: Long, maxSeed: Long,
+                                     seedMapping: LocationToSeedMapping): SubstringDataGen = {
+    super.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    child.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    this
+  }
+
+  override protected def getValGen: GeneratorFunction = JSONArrayGenFunc(child.getGen)
+
+  override def get(name: String): Option[SubstringDataGen] = {
+    if ("data".equalsIgnoreCase(name) || "child".equalsIgnoreCase(name)) {
+      Some(child)
+    } else {
+      None
+    }
+  }
+
+  override def children: Seq[(String, SubstringDataGen)] = Seq(("data", child))
+}
+
+case class JSONObjectGenFunc(childGens: Array[(String, GeneratorFunction)],
+                             lengthGen: LengthGeneratorFunction = null,
+                             mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+  override def apply(rowLoc: RowLocation): Any = {
+    // TODO randomize the order of the children???
+    // TODO duplicate child values???
+    // The row location does not change for a struct/object
+    val data = childGens.map {
+      case (k, gen) =>
+        val key = k.replace("\\", "\\\\")
+          .replace("\"", "\\\"")
+          .replace("\n", "\\n")
+          .replace("\r", "\\r")
+          .replace("\b", "\\b")
+          .replace("\f", "\\f")
+        val v = gen.apply(rowLoc)
+        if (v == null) {
+          ""
+        } else {
+          '"' + key + "\":" + v
+        }
+    }
+    val ret = data.filterNot(_.isEmpty).mkString("{",",","}")
+    UTF8String.fromString(ret)
+  }
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONObjectGenFunc =
+    JSONObjectGenFunc(childGens, lengthGen, mapping)
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONObjectGenFunc =
+    JSONObjectGenFunc(childGens, lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONObjectGen(val children: Seq[(String, SubstringDataGen)],
+                conf: ColumnConf,
+                defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override def setCorrelatedKeyGroup(keyGroup: Long,
+                                     minSeed: Long, maxSeed: Long,
+                                     seedMapping: LocationToSeedMapping): SubstringDataGen = {
+    super.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    children.foreach {
+      case (_, gen) =>
+        gen.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    }
+    this
+  }
+
+  override def get(name: String): Option[SubstringDataGen] =
+    children.collectFirst {
+      case (childName, dataGen) if childName.equalsIgnoreCase(name) => dataGen
+    }
+
+  override protected def getValGen: GeneratorFunction = {
+    val childGens = children.map(c => (c._1, c._2.getGen)).toArray
+    JSONObjectGenFunc(childGens)
+  }
+}
+
+case class JSONChoiceGenFunc(choices: List[(Double, GeneratorFunction)],
+                             lengthGen: LengthGeneratorFunction = null,
+                             mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+  override def apply(rowLoc: RowLocation): Any = {
+    val r = DataGen.getRandomFor(rowLoc, mapping)
+    val l = r.nextDouble()
+    var index = 0
+    while (choices(index)._1 < l) {
+      index += 1
+    }
+    val childRowLoc = rowLoc.withNewChild()
+    choices(index)._2(childRowLoc)
+  }
+
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): JSONChoiceGenFunc =
+    JSONChoiceGenFunc(choices, lengthGen, mapping)
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): JSONChoiceGenFunc =
+    JSONChoiceGenFunc(choices, lengthGen, mapping)
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for JSON")
+}
+
+class JSONChoiceGen(val choices: Seq[(Double, String, SubstringDataGen)],
+                    conf: ColumnConf,
+                    defaultValueRange: Option[(Any, Any)] = None)
+  extends SubstringDataGen(conf, defaultValueRange) {
+
+  override val children: Seq[(String, SubstringDataGen)] =
+    choices.map { case (_, name, gen) => (name, gen) }
+
+  override def setCorrelatedKeyGroup(keyGroup: Long,
+                                     minSeed: Long, maxSeed: Long,
+                                     seedMapping: LocationToSeedMapping): SubstringDataGen = {
+    super.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    children.foreach {
+      case (_, gen) =>
+        gen.setCorrelatedKeyGroup(keyGroup, minSeed, maxSeed, seedMapping)
+    }
+    this
+  }
+
+  override def get(name: String): Option[SubstringDataGen] =
+    children.collectFirst {
+      case (childName, dataGen) if childName.equalsIgnoreCase(name) => dataGen
+    }
+
+  override protected def getValGen: GeneratorFunction = {
+    val childGens = choices.map(c => (c._1, c._3.getGen)).toList
+    JSONChoiceGenFunc(childGens)
+  }
 }
 
 case class ASCIIGenFunc(
@@ -1672,14 +2451,46 @@ case class ASCIIGenFunc(
     throw new IllegalArgumentException("value ranges are not supported for strings")
 }
 
-class StringGen(conf: ColumnConf, defaultValueRange: Option[(Any, Any)])
-    extends DataGen(conf, defaultValueRange) {
+/**
+ * This is here to wrap the substring gen function so that its length/settings
+ * are the ones used when generating a string, and not what was set for the string.
+ */
+case class SubstringGenFunc(
+    substringGen: GeneratorFunction,
+    lengthGen: LengthGeneratorFunction = null,
+    mapping: LocationToSeedMapping = null) extends GeneratorFunction {
+
+  override def apply(rowLoc: RowLocation): Any = {
+    substringGen(rowLoc)
+  }
+
+  // The length and location seed mapping are just ignored for this...
+  override def withLengthGeneratorFunction(lengthGen: LengthGeneratorFunction): GeneratorFunction =
+    this
+
+  override def withLocationToSeedMapping(mapping: LocationToSeedMapping): GeneratorFunction =
+    this
+
+  override def withValueRange(min: Any, max: Any): GeneratorFunction =
+    throw new IllegalArgumentException("value ranges are not supported for strings")
+}
+
+class StringGen(conf: ColumnConf,
+                defaultValueRange: Option[(Any, Any)],
+                var substringDataGen: Option[SubstringDataGen] = None)
+  extends DataGen(conf, defaultValueRange) {
 
   override def dataType: DataType = StringType
 
-  override protected def getValGen: GeneratorFunction = ASCIIGenFunc()
+  override protected def getValGen: GeneratorFunction =
+    substringDataGen.map(s => SubstringGenFunc(s.getGen)).getOrElse(ASCIIGenFunc())
 
   override def children: Seq[(String, DataGen)] = Seq.empty
+
+  override def setSubstringGen(subgen: Option[SubstringDataGen]): Unit =
+    substringDataGen = subgen
+
+  override def getSubstringGen: Option[SubstringDataGen] = substringDataGen
 }
 
 case class StructGenFunc(childGens: Array[GeneratorFunction]) extends GeneratorFunction {
@@ -1854,7 +2665,6 @@ class MapGen(key: DataGen,
   override def children: Seq[(String, DataGen)] = Seq(("key", key), ("value", value))
 }
 
-
 object ColumnGen {
   private def genInternal(rowNumber: Column,
       dataType: DataType,
@@ -1869,8 +2679,8 @@ object ColumnGen {
  */
 class ColumnGen(val dataGen: DataGen) {
   def setCorrelatedKeyGroup(kg: Long,
-      minSeed: Long, maxSeed: Long,
-      seedMapping: LocationToSeedMapping): ColumnGen = {
+                            minSeed: Long, maxSeed: Long,
+                            seedMapping: LocationToSeedMapping): ColumnGen = {
     dataGen.setCorrelatedKeyGroup(kg, minSeed, maxSeed, seedMapping)
     this
   }
@@ -1930,6 +2740,11 @@ class ColumnGen(val dataGen: DataGen) {
     this
   }
 
+  def setGaussianLength(mean: Double, stdDev: Double): ColumnGen = {
+    dataGen.setGaussianLength(mean, stdDev)
+    this
+  }
+
   final def apply(name: String): DataGen = {
     get(name).getOrElse {
       throw new IllegalArgumentException(s"$name not a child of $this")
@@ -1941,8 +2756,16 @@ class ColumnGen(val dataGen: DataGen) {
   def gen(rowNumber: Column): Column = {
     ColumnGen.genInternal(rowNumber, dataGen.dataType, dataGen.nullable, dataGen.getGen)
   }
+
+  def getSubstring: Option[SubstringDataGen] = dataGen.getSubstringGen
+
+  def substringGen: SubstringDataGen = dataGen.substringGen
+
+  def setSubstringGen(f : ColumnConf => SubstringDataGen): Unit =
+    dataGen.setSubstringGen(f)
 }
 
+
 sealed trait KeyGroupType
 
 /**
@@ -2192,7 +3015,7 @@ object DBGen {
       numRows: Long,
       mapping: OrderedTypeMapping): Seq[(String, ColumnGen)] = {
     // a bit of a hack with the column num so that we update it before each time...
-    var conf = ColumnConf(ColumnLocation(tableId, -1), true, numRows)
+    var conf = ColumnConf(ColumnLocation(tableId, -1, 0), true, numRows)
     st.toArray.map { sf =>
       if (!mapping.canMap(sf.dataType, mapping)) {
         throw new IllegalArgumentException(s"$sf is not supported at this time")
diff --git a/dist/maven-antrun/build-parallel-worlds.xml b/dist/maven-antrun/build-parallel-worlds.xml
index 524b15addf9..07838616340 100644
--- a/dist/maven-antrun/build-parallel-worlds.xml
+++ b/dist/maven-antrun/build-parallel-worlds.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <!--
-  Copyright (c) 2021-2023, NVIDIA CORPORATION.
+  Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -131,7 +131,7 @@
 
         <!-- Remove the explicily unshimmed files from the common directory -->
         <delete>
-            <fileset dir="${project.build.directory}/parallel-world/spark3xx-common"
+            <fileset dir="${project.build.directory}/parallel-world/spark-shared"
                      includesfile="${spark.rapids.source.basedir}/${rapids.module}/unshimmed-common-from-spark311.txt"/>
         </delete>
     </target>
diff --git a/dist/scripts/binary-dedupe.sh b/dist/scripts/binary-dedupe.sh
index 183e86b1524..356b0b4dbae 100755
--- a/dist/scripts/binary-dedupe.sh
+++ b/dist/scripts/binary-dedupe.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -34,10 +34,10 @@ case "$OSTYPE" in
 esac
 
 STEP=0
-export SPARK3XX_COMMON_TXT="$PWD/spark3xx-common.txt"
-export SPARK3XX_COMMON_COPY_LIST="$PWD/spark-common-copy-list.txt"
+export SPARK_SHARED_TXT="$PWD/spark-shared.txt"
+export SPARK_SHARED_COPY_LIST="$PWD/spark-shared-copy-list.txt"
 export DELETE_DUPLICATES_TXT="$PWD/delete-duplicates.txt"
-export SPARK3XX_COMMON_DIR="$PWD/spark3xx-common"
+export SPARK_SHARED_DIR="$PWD/spark-shared"
 
 # This script de-duplicates .class files at the binary level.
 # We could also diff classes using scalap / javap outputs.
@@ -47,17 +47,17 @@ export SPARK3XX_COMMON_DIR="$PWD/spark3xx-common"
 
 # The following pipeline determines identical classes across shims in this build.
 # - checksum all class files
-# - move the varying-prefix spark3xy to the left so it can be easily skipped for uniq and sort
+# - move the varying-prefix sparkxyz to the left so it can be easily skipped for uniq and sort
 # - sort by path, secondary sort by checksum, print one line per group
 # - produce uniq count for paths
 # - filter the paths with count=1, the class files without diverging checksums
-# - put the path starting with /spark3xy back together for the final list
+# - put the path starting with /sparkxyz back together for the final list
 echo "Retrieving class files hashing to a single value ..."
 
 
 echo "$((++STEP))/ SHA1 of all non-META files > tmp-sha1-files.txt"
-find ./parallel-world/spark3* -name META-INF -prune -o \( -type f -print \) | \
-  xargs $SHASUM > tmp-sha1-files.txt
+find ./parallel-world/spark[34]* -name META-INF -prune -o -name webapps -prune -o \( -type f -print0 \) | \
+  xargs --null $SHASUM > tmp-sha1-files.txt
 
 echo "$((++STEP))/ make shim column 1 > tmp-shim-sha-package-files.txt"
 < tmp-sha1-files.txt awk -F/ '$1=$1' | \
@@ -68,10 +68,10 @@ echo "$((++STEP))/ sort by path, sha1; output first from each group > tmp-count-
 sort -k3 -k2,2 -u tmp-shim-sha-package-files.txt | \
   uniq -f 2 -c > tmp-count-shim-sha-package-files.txt
 
-echo "$((++STEP))/ files with unique sha1 > $SPARK3XX_COMMON_TXT"
+echo "$((++STEP))/ files with unique sha1 > $SPARK_SHARED_TXT"
 grep '^\s\+1 .*' tmp-count-shim-sha-package-files.txt | \
   awk '{$1=""; $3=""; print $0 }' | \
-  tr -s ' ' | sed 's/\ /\//g' > "$SPARK3XX_COMMON_TXT"
+  tr -s ' ' | sed 's/\ /\//g' > "$SPARK_SHARED_TXT"
 
 function retain_single_copy() {
   set -e
@@ -93,10 +93,10 @@ function retain_single_copy() {
   package_class="${package_class_with_spaces// //}"
 
   # get the reference copy out of the way
-  echo "$package_class" >> "from-$shim-to-spark3xx-common.txt"
+  echo "$package_class" >> "from-$shim-to-spark-shared.txt"
   # expanding directories separately because full path
   # glob is broken for class file name including the "$" character
-  for pw in ./parallel-world/spark3* ; do
+  for pw in ./parallel-world/spark[34]* ; do
     delete_path="$pw/$package_class"
     [[ -f "$delete_path" ]] && echo "$delete_path" || true
   done >> "$DELETE_DUPLICATES_TXT" || exit 255
@@ -106,26 +106,26 @@ function retain_single_copy() {
 # standalone debugging
 # truncate incremental files
 : > "$DELETE_DUPLICATES_TXT"
-rm -f from-spark3*-to-spark3xx-common.txt
-rm -rf "$SPARK3XX_COMMON_DIR"
-mkdir -p "$SPARK3XX_COMMON_DIR"
+rm -f from-spark[34]*-to-spark-shared.txt
+rm -rf "$SPARK_SHARED_DIR"
+mkdir -p "$SPARK_SHARED_DIR"
 
-echo "$((++STEP))/ retaining a single copy of spark3xx-common classes"
+echo "$((++STEP))/ retaining a single copy of spark-shared classes"
 while read spark_common_class; do
   retain_single_copy "$spark_common_class"
-done < "$SPARK3XX_COMMON_TXT"
+done < "$SPARK_SHARED_TXT"
 
-echo "$((++STEP))/ rsyncing common classes to $SPARK3XX_COMMON_DIR"
-for copy_list in from-spark3*-to-spark3xx-common.txt; do
+echo "$((++STEP))/ rsyncing common classes to $SPARK_SHARED_DIR"
+for copy_list in from-spark[34]*-to-spark-shared.txt; do
   echo Initializing rsync of "$copy_list"
   IFS='-' <<< "$copy_list" read -ra copy_list_parts
   # declare -p copy_list_parts
   shim="${copy_list_parts[1]}"
   # use rsync to reduce process forking
-  rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK3XX_COMMON_DIR"
+  rsync --files-from="$copy_list" ./parallel-world/"$shim" "$SPARK_SHARED_DIR"
 done
 
-mv "$SPARK3XX_COMMON_DIR" parallel-world/
+mv "$SPARK_SHARED_DIR" parallel-world/
 
 # TODO further dedupe by FEATURE version lines:
 #  spark30x-common
@@ -137,9 +137,9 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/
 #
 # At this point the duplicate classes have not been removed from version-specific jar
 # locations such as parallel-world/spark312.
-# For each unshimmed class file look for all of its copies inside /spark3* and
+# For each unshimmed class file look for all of its copies inside /spark[34]* and
 # and count the number of distinct checksums. There are two representative cases
-# 1) The class is contributed to the unshimmed location via the unshimmed-from-each-spark3xx list. These are classes
+# 1) The class is contributed to the unshimmed location via the unshimmed-from-each-spark34 list. These are classes
 #    carrying the shim classifier in their package name such as
 #    com.nvidia.spark.rapids.spark312.RapidsShuffleManager. They are unique by construction,
 #    and will have zero copies in any non-spark312 shims. Although such classes are currently excluded from
@@ -157,25 +157,25 @@ mv "$SPARK3XX_COMMON_DIR" parallel-world/
 # Determine the list of unshimmed class files
 UNSHIMMED_LIST_TXT=unshimmed-result.txt
 echo "$((++STEP))/ creating sorted list of unshimmed classes > $UNSHIMMED_LIST_TXT"
-find ./parallel-world -name '*.class' -not -path './parallel-world/spark3*' | \
+find ./parallel-world -name '*.class' -not -path './parallel-world/spark[34-]*' | \
   cut -d/ -f 3- | sort > "$UNSHIMMED_LIST_TXT"
 
 function verify_same_sha_for_unshimmed() {
   set -e
   class_file="$1"
 
-  # the raw spark3xx-common.txt file list contains all single-sha1 classes
+  # the raw spark-shared.txt file list contains all single-sha1 classes
   # including the ones that are unshimmed. Instead of expensively recomputing
   # sha1 look up if there is an entry with the unshimmed class as a suffix
 
   class_file_quoted=$(printf '%q' "$class_file")
 
-  # TODO currently RapidsShuffleManager is "removed" from /spark3* by construction in
+  # TODO currently RapidsShuffleManager is "removed" from /spark* by construction in
   # dist pom.xml via ant. We could delegate this logic to this script
   # and make both simmpler
-  if [[ ! "$class_file_quoted" =~ (com/nvidia/spark/rapids/spark3.*/.*ShuffleManager.class|org/apache/spark/sql/rapids/shims/spark3.*/ProxyRapidsShuffleInternalManager.class) ]]; then
+  if [[ ! "$class_file_quoted" =~ (com/nvidia/spark/rapids/spark[34].*/.*ShuffleManager.class|org/apache/spark/sql/rapids/shims/spark[34].*/ProxyRapidsShuffleInternalManager.class) ]]; then
 
-    if ! grep -q "/spark.\+/$class_file_quoted" "$SPARK3XX_COMMON_TXT"; then
+    if ! grep -q "/spark.\+/$class_file_quoted" "$SPARK_SHARED_TXT"; then
       echo >&2 "$class_file is not bitwise-identical across shims"
       exit 255
     fi
@@ -192,7 +192,7 @@ done < "$UNSHIMMED_LIST_TXT"
 echo "$((++STEP))/ removing duplicates of unshimmed classes"
 
 while read unshimmed_class; do
-  for pw in ./parallel-world/spark3* ; do
+  for pw in ./parallel-world/spark[34]* ; do
     unshimmed_path="$pw/$unshimmed_class"
     [[ -f "$unshimmed_path" ]] && echo "$unshimmed_path" || true
   done >> "$DELETE_DUPLICATES_TXT"
diff --git a/docs/archive.md b/docs/archive.md
index 6cce30557f4..f4eeab11a40 100644
--- a/docs/archive.md
+++ b/docs/archive.md
@@ -5,11 +5,143 @@ nav_order: 15
 ---
 Below are archived releases for RAPIDS Accelerator for Apache Spark.
 
+## Release v24.04.1
+### Hardware Requirements:
+
+The plugin is tested on the following architectures:
+
+	GPU Models: NVIDIA V100, T4, A10/A100, L4 and H100 GPUs
+
+### Software Requirements:
+
+	OS: Ubuntu 20.04, Ubuntu 22.04, CentOS 7, or Rocky Linux 8
+
+	NVIDIA Driver*: R470+
+
+	Runtime: 
+		Scala 2.12, 2.13
+		Python, Java Virtual Machine (JVM) compatible with your spark-version. 
+
+		* Check the Spark documentation for Python and Java version compatibility with your specific 
+		Spark version. For instance, visit `https://spark.apache.org/docs/3.4.1` for Spark 3.4.1.
+
+	Supported Spark versions:
+		Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4
+		Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3, 3.3.4
+		Apache Spark 3.4.0, 3.4.1, 3.4.2
+		Apache Spark 3.5.0, 3.5.1
+	
+	Supported Databricks runtime versions for Azure and AWS:
+		Databricks 11.3 ML LTS (GPU, Scala 2.12, Spark 3.3.0)
+		Databricks 12.2 ML LTS (GPU, Scala 2.12, Spark 3.3.2)
+		Databricks 13.3 ML LTS (GPU, Scala 2.12, Spark 3.4.1)
+	
+	Supported Dataproc versions (Debian/Ubuntu):
+		GCP Dataproc 2.0
+		GCP Dataproc 2.1
+	
+	Supported Dataproc Serverless versions:
+		Spark runtime 1.1 LTS
+		Spark runtime 2.0
+		Spark runtime 2.1
+
+*Some hardware may have a minimum driver version greater than R470. Check the GPU spec sheet
+for your hardware's minimum driver version.
+
+*For Cloudera and EMR support, please refer to the
+[Distributions](https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html#which-distributions-are-supported) section of the FAQ.
+
+### RAPIDS Accelerator's Support Policy for Apache Spark
+The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html)
+
+### Download RAPIDS Accelerator for Apache Spark v24.04.1
+
+| Processor | Scala Version | Download Jar | Download Signature |
+|-----------|---------------|--------------|--------------------|
+| x86_64    | Scala 2.12    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1.jar.asc) |
+| x86_64    | Scala 2.13    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1.jar.asc) |
+| arm64     | Scala 2.12    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1-cuda11-arm64.jar.asc) |
+| arm64     | Scala 2.13    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1-cuda11-arm64.jar.asc) |
+
+This package is built against CUDA 11.8. It is tested on V100, T4, A10, A100, L4 and H100 GPUs with 
+CUDA 11.8 through CUDA 12.0.
+
+### Verify signature
+* Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com).
+* Import the public key: `gpg --import PUB_KEY`
+* Verify the signature for Scala 2.12 jar:
+    `gpg --verify rapids-4-spark_2.12-24.04.1.jar.asc rapids-4-spark_2.12-24.04.1.jar`
+* Verify the signature for Scala 2.13 jar:
+    `gpg --verify rapids-4-spark_2.13-24.04.1.jar.asc rapids-4-spark_2.13-24.04.1.jar`
+
+The output of signature verify:
+
+	gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) <sw-spark@nvidia.com>"
+
+### Release Notes
+* New functionality and performance improvements for this release include:
+* Performance improvements for S3 reading. 
+Refer to perfio.s3.enabled in [advanced_configs](./additional-functionality/advanced_configs.md) for more details.
+* Performance improvements when doing a joins on unique keys.
+* Enhanced decompression kernels for zstd and snappy.
+* Enhanced Parquet reading performance with modular kernels.
+* Added compatibility with Spark version 3.5.1.
+* Deprecated support for Databricks 10.4 ML LTS.
+* For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases).
+
+For a detailed list of changes, please refer to the
+[CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md).
+
+## Archived releases
+
+As new releases come out, previous ones will still be available in [archived releases](./archive.md).
+
 ## Release v24.04.0
 ### Hardware Requirements:
 
 The plugin is tested on the following architectures:
-	@@ -67,14 +67,14 @@ for your hardware's minimum driver version.
+
+	GPU Models: NVIDIA V100, T4, A10/A100, L4 and H100 GPUs
+
+### Software Requirements:
+
+	OS: Ubuntu 20.04, Ubuntu 22.04, CentOS 7, or Rocky Linux 8
+
+	NVIDIA Driver*: R470+
+
+	Runtime: 
+		Scala 2.12, 2.13
+		Python, Java Virtual Machine (JVM) compatible with your spark-version. 
+
+		* Check the Spark documentation for Python and Java version compatibility with your specific 
+		Spark version. For instance, visit `https://spark.apache.org/docs/3.4.1` for Spark 3.4.1.
+
+	Supported Spark versions:
+		Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4
+		Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3, 3.3.4
+		Apache Spark 3.4.0, 3.4.1, 3.4.2
+		Apache Spark 3.5.0, 3.5.1
+	
+	Supported Databricks runtime versions for Azure and AWS:
+		Databricks 11.3 ML LTS (GPU, Scala 2.12, Spark 3.3.0)
+		Databricks 12.2 ML LTS (GPU, Scala 2.12, Spark 3.3.2)
+		Databricks 13.3 ML LTS (GPU, Scala 2.12, Spark 3.4.1)
+	
+	Supported Dataproc versions (Debian/Ubuntu):
+		GCP Dataproc 2.0
+		GCP Dataproc 2.1
+	
+	Supported Dataproc Serverless versions:
+		Spark runtime 1.1 LTS
+		Spark runtime 2.0
+		Spark runtime 2.1
+
+*Some hardware may have a minimum driver version greater than R470. Check the GPU spec sheet
+for your hardware's minimum driver version.
+
+*For Cloudera and EMR support, please refer to the
+[Distributions](https://docs.nvidia.com/spark-rapids/user-guide/latest/faq.html#which-distributions-are-supported) section of the FAQ.
+
 ### RAPIDS Accelerator's Support Policy for Apache Spark
 The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html)
 
@@ -74,7 +206,7 @@ The plugin is tested on the following architectures:
 	Supported Spark versions:
 		Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4
 		Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3
-		Apache Spark 3.4.0, 3.4.1
+		Apache Spark 3.4.0, 3.4.1, 3.4.2
 		Apache Spark 3.5.0
 	
 	Supported Databricks runtime versions for Azure and AWS:
diff --git a/docs/archives/CHANGELOG_24.02.md b/docs/archives/CHANGELOG_24.02.md
new file mode 100644
index 00000000000..732035502f0
--- /dev/null
+++ b/docs/archives/CHANGELOG_24.02.md
@@ -0,0 +1,300 @@
+# Change log\nGenerated on 2024-06-05
+## Release 24.02
+
+### Features
+|||
+|:---|:---|
+|[#9926](https://github.com/NVIDIA/spark-rapids/issues/9926)|[FEA] Add config option for the parquet reader input read limit.|
+|[#10270](https://github.com/NVIDIA/spark-rapids/issues/10270)|[FEA] Add support for single quotes when reading JSON|
+|[#10253](https://github.com/NVIDIA/spark-rapids/issues/10253)|[FEA] Enable mixed types as string in GpuJsonToStruct|
+|[#9692](https://github.com/NVIDIA/spark-rapids/issues/9692)|[FEA] Remove Pascal support|
+|[#8806](https://github.com/NVIDIA/spark-rapids/issues/8806)|[FEA] Support lazy quantifier and specified group index in regexp_extract function|
+|[#10079](https://github.com/NVIDIA/spark-rapids/issues/10079)|[FEA] Add string parameter support for `unix_timestamp` for non-UTC time zones|
+|[#9667](https://github.com/NVIDIA/spark-rapids/issues/9667)|[FEA][JSON] Add support for non default `dateFormat` in `from_json`|
+|[#9173](https://github.com/NVIDIA/spark-rapids/issues/9173)|[FEA] Support format_number |
+|[#10145](https://github.com/NVIDIA/spark-rapids/issues/10145)|[FEA] Support to_utc_timestamp|
+|[#9927](https://github.com/NVIDIA/spark-rapids/issues/9927)|[FEA] Support to_date with non-UTC timezones without DST|
+|[#10006](https://github.com/NVIDIA/spark-rapids/issues/10006)|[FEA] Support ```ParseToTimestamp``` for non-UTC time zones|
+|[#9096](https://github.com/NVIDIA/spark-rapids/issues/9096)|[FEA] Add Spark 3.3.4 support|
+|[#9585](https://github.com/NVIDIA/spark-rapids/issues/9585)|[FEA] support ascii function|
+|[#9260](https://github.com/NVIDIA/spark-rapids/issues/9260)|[FEA] Create Spark 3.4.2 shim and build env|
+|[#10076](https://github.com/NVIDIA/spark-rapids/issues/10076)|[FEA] Add performance test framework for non-UTC time zone features.|
+|[#9881](https://github.com/NVIDIA/spark-rapids/issues/9881)|[TASK] Remove `spark.rapids.sql.nonUTC.enabled` configuration option|
+|[#9801](https://github.com/NVIDIA/spark-rapids/issues/9801)|[FEA] Support DateFormat on GPU with a non-UTC timezone|
+|[#6834](https://github.com/NVIDIA/spark-rapids/issues/6834)|[FEA] Support GpuHour expression for timezones other than UTC|
+|[#6842](https://github.com/NVIDIA/spark-rapids/issues/6842)|[FEA] Support TimeZone aware operations for value extraction|
+|[#1860](https://github.com/NVIDIA/spark-rapids/issues/1860)|[FEA] Optimize row based window operations for BOUNDED ranges|
+|[#9606](https://github.com/NVIDIA/spark-rapids/issues/9606)|[FEA] Support unix_timestamp with CST(China Time Zone) support|
+|[#9815](https://github.com/NVIDIA/spark-rapids/issues/9815)|[FEA] Support ```unix_timestamp``` for non-DST timezones|
+|[#8807](https://github.com/NVIDIA/spark-rapids/issues/8807)|[FEA] support ‘yyyyMMdd’ format in from_unixtime function|
+|[#9605](https://github.com/NVIDIA/spark-rapids/issues/9605)|[FEA] Support from_unixtime with CST(China Time Zone) support|
+|[#6836](https://github.com/NVIDIA/spark-rapids/issues/6836)|[FEA] Support FromUnixTime for non UTC timezones|
+|[#9175](https://github.com/NVIDIA/spark-rapids/issues/9175)|[FEA] Support Databricks 13.3|
+|[#6881](https://github.com/NVIDIA/spark-rapids/issues/6881)|[FEA] Support RAPIDS Spark plugin on ARM|
+|[#9274](https://github.com/NVIDIA/spark-rapids/issues/9274)|[FEA] Regular deploy process to include arm artifacts|
+|[#9844](https://github.com/NVIDIA/spark-rapids/issues/9844)|[FEA] Let Gpu arrow python runners support writing one batch one time for the single threaded model.|
+|[#7309](https://github.com/NVIDIA/spark-rapids/issues/7309)|[FEA] Detect multiple versions of the RAPIDS jar on the classpath at the same time|
+
+### Performance
+|||
+|:---|:---|
+|[#9442](https://github.com/NVIDIA/spark-rapids/issues/9442)|[FEA] For hash joins where the build side can change use the smaller table for the build side|
+|[#10142](https://github.com/NVIDIA/spark-rapids/issues/10142)|[TASK] Benchmark existing timestamp functions that work in non-UTC time zone (non-DST)|
+
+### Bugs Fixed
+|||
+|:---|:---|
+|[#10548](https://github.com/NVIDIA/spark-rapids/issues/10548)|[BUG] test_dpp_bypass / test_dpp_via_aggregate_subquery failures in CI Databricks 13.3|
+|[#10530](https://github.com/NVIDIA/spark-rapids/issues/10530)|test_delta_merge_match_delete_only java.lang.OutOfMemoryError: GC overhead limit exceeded|
+|[#10464](https://github.com/NVIDIA/spark-rapids/issues/10464)|[BUG] spark334 and spark342 shims missed in scala2.13 dist jar|
+|[#10473](https://github.com/NVIDIA/spark-rapids/issues/10473)|[BUG] Leak when running RANK query|
+|[#10432](https://github.com/NVIDIA/spark-rapids/issues/10432)|Plug-in Build Failing for Databricks 11.3 |
+|[#9974](https://github.com/NVIDIA/spark-rapids/issues/9974)|[BUG] host memory Leak in MultiFileCoalescingPartitionReaderBase in UTC time zone|
+|[#10359](https://github.com/NVIDIA/spark-rapids/issues/10359)|[BUG] Build failure on Databricks nightly run with `GpuMapInPandasExecMeta`|
+|[#10327](https://github.com/NVIDIA/spark-rapids/issues/10327)|[BUG] Unit test FAILED against : SPARK-24957: average with decimal followed by aggregation returning wrong result |
+|[#10324](https://github.com/NVIDIA/spark-rapids/issues/10324)|[BUG] hash_aggregate_test.py test FAILED:  Type conversion is not allowed from Table {...}|
+|[#10291](https://github.com/NVIDIA/spark-rapids/issues/10291)|[BUG] SIGSEGV in libucp.so|
+|[#9212](https://github.com/NVIDIA/spark-rapids/issues/9212)|[BUG] `from_json` fails with cuDF error `Invalid list size computation error`|
+|[#10264](https://github.com/NVIDIA/spark-rapids/issues/10264)|[BUG] hash aggregate test failures due to type conversion errors|
+|[#10262](https://github.com/NVIDIA/spark-rapids/issues/10262)|[BUG] Test "SPARK-24957: average with decimal followed by aggregation returning wrong result" failed.|
+|[#9353](https://github.com/NVIDIA/spark-rapids/issues/9353)|[BUG] [JSON] A mix of lists and structs within the same column is not supported|
+|[#10099](https://github.com/NVIDIA/spark-rapids/issues/10099)|[BUG] orc_test.py::test_orc_scan_with_aggregate_pushdown fails with a standalone cluster on spark 3.3.0|
+|[#10047](https://github.com/NVIDIA/spark-rapids/issues/10047)|[BUG] CudfException during conditional hash join while running nds query64|
+|[#9779](https://github.com/NVIDIA/spark-rapids/issues/9779)|[BUG] 330cdh failed test_hash_reduction_sum_full_decimal on CI|
+|[#10197](https://github.com/NVIDIA/spark-rapids/issues/10197)|[BUG] Disable GetJsonObject by default and update docs|
+|[#10165](https://github.com/NVIDIA/spark-rapids/issues/10165)|[BUG] Databricks 13.3 executor side broadcast failure|
+|[#10224](https://github.com/NVIDIA/spark-rapids/issues/10224)|[BUG] DBR builds fails when installing Maven|
+|[#10222](https://github.com/NVIDIA/spark-rapids/issues/10222)|[BUG] to_utc_timestamp and from_utc_timestamp fallback when TZ is supported time zone|
+|[#10195](https://github.com/NVIDIA/spark-rapids/issues/10195)|[BUG] test_window_aggs_for_negative_rows_partitioned failure in CI|
+|[#10182](https://github.com/NVIDIA/spark-rapids/issues/10182)|[BUG] test_dpp_bypass / test_dpp_via_aggregate_subquery failures in CI (databricks)|
+|[#10169](https://github.com/NVIDIA/spark-rapids/issues/10169)|[BUG] Host column vector leaks when running `test_cast_timestamp_to_date`|
+|[#10050](https://github.com/NVIDIA/spark-rapids/issues/10050)|[BUG] test_cast_decimal_to_decimal[to:DecimalType(1,-1)-from:Decimal(5,-3)] fails with DATAGEN_SEED=1702439569|
+|[#10088](https://github.com/NVIDIA/spark-rapids/issues/10088)|[BUG] GpuExplode single row split to fit cuDF limits|
+|[#10174](https://github.com/NVIDIA/spark-rapids/issues/10174)|[BUG]  json_test.py::test_from_json_struct_timestamp failed on: Part of the plan is not columnar |
+|[#10186](https://github.com/NVIDIA/spark-rapids/issues/10186)|[BUG] test_to_date_with_window_functions failed in non-UTC nightly CI|
+|[#10154](https://github.com/NVIDIA/spark-rapids/issues/10154)|[BUG] 'spark-test.sh' integration tests FAILED on 'ps: command not found" in Rocky Docker environment|
+|[#10175](https://github.com/NVIDIA/spark-rapids/issues/10175)|[BUG] string_test.py::test_format_number_float_special FAILED : AssertionError 'NaN' == |
+|[#10166](https://github.com/NVIDIA/spark-rapids/issues/10166)|Detect Undeclared Shim in POM.xml|
+|[#10170](https://github.com/NVIDIA/spark-rapids/issues/10170)|[BUG] `test_cast_timestamp_to_date` fails with `TZ=Asia/Hebron`|
+|[#10149](https://github.com/NVIDIA/spark-rapids/issues/10149)|[BUG] GPU illegal access detected during delta_byte_array.parquet read|
+|[#9905](https://github.com/NVIDIA/spark-rapids/issues/9905)|[BUG] GpuJsonScan incorrect behavior when parsing dates|
+|[#10163](https://github.com/NVIDIA/spark-rapids/issues/10163)|Spark 3.3.4 Shim Build Failure|
+|[#10105](https://github.com/NVIDIA/spark-rapids/issues/10105)|[BUG] scala:compile is not thread safe unless compiler bridge already exists |
+|[#10026](https://github.com/NVIDIA/spark-rapids/issues/10026)|[BUG] test_hash_agg_with_nan_keys failed with a DATAGEN_SEED=1702335559|
+|[#10075](https://github.com/NVIDIA/spark-rapids/issues/10075)|[BUG] `non-pinned blocking alloc with spill` unit test failed in HostAllocSuite|
+|[#10134](https://github.com/NVIDIA/spark-rapids/issues/10134)|[BUG] test_window_aggs_for_batched_finite_row_windows_partitioned failed on Scala 2.13 with DATAGEN_SEED=1704033145|
+|[#10118](https://github.com/NVIDIA/spark-rapids/issues/10118)|[BUG] non-UTC Nightly CI failed|
+|[#10136](https://github.com/NVIDIA/spark-rapids/issues/10136)|[BUG] The canonicalized version of `GpuFileSourceScanExec`s that suppose to be semantic-equal can be different |
+|[#10110](https://github.com/NVIDIA/spark-rapids/issues/10110)|[BUG] disable collect_list and collect_set for window operations by default.|
+|[#10129](https://github.com/NVIDIA/spark-rapids/issues/10129)|[BUG] Unit test suite fails with `Null data pointer` in GpuTimeZoneDB|
+|[#10089](https://github.com/NVIDIA/spark-rapids/issues/10089)|[BUG] DATAGEN_SEED=<seed> environment does not override the marker datagen_overrides|
+|[#10108](https://github.com/NVIDIA/spark-rapids/issues/10108)|[BUG] @datagen_overrides seed is sticky when it shouldn't be|
+|[#10064](https://github.com/NVIDIA/spark-rapids/issues/10064)|[BUG] test_unsupported_fallback_regexp_replace failed with DATAGEN_SEED=1702662063|
+|[#10117](https://github.com/NVIDIA/spark-rapids/issues/10117)|[BUG] test_from_utc_timestamp failed on Cloudera Env when TZ is Iran|
+|[#9914](https://github.com/NVIDIA/spark-rapids/issues/9914)|[BUG] Report GPU OOM on recent passed CI premerges.|
+|[#10094](https://github.com/NVIDIA/spark-rapids/issues/10094)|[BUG] spark351 PR check failure MockTaskContext method isFailed in class TaskContext of type ()Boolean is not defined|
+|[#10017](https://github.com/NVIDIA/spark-rapids/issues/10017)|[BUG] test_casting_from_double_to_timestamp failed for DATAGEN_SEED=1702329497|
+|[#9992](https://github.com/NVIDIA/spark-rapids/issues/9992)|[BUG] conditionals_test.py::test_conditional_with_side_effects_cast[String] failed with DATAGEN_SEED=1701976979|
+|[#9743](https://github.com/NVIDIA/spark-rapids/issues/9743)|[BUG][AUDIT] SPARK-45652 - SPJ: Handle empty input partitions after dynamic filtering|
+|[#9859](https://github.com/NVIDIA/spark-rapids/issues/9859)|[AUDIT] [SPARK-45786] Inaccurate Decimal multiplication and division results|
+|[#9555](https://github.com/NVIDIA/spark-rapids/issues/9555)|[BUG] Scala 2.13 build with JDK 11 or 17 fails OpcodeSuite tests|
+|[#10073](https://github.com/NVIDIA/spark-rapids/issues/10073)|[BUG] test_csv_prefer_date_with_infer_schema failed with DATAGEN_SEED=1702847907|
+|[#10004](https://github.com/NVIDIA/spark-rapids/issues/10004)|[BUG] If a host memory buffer is spilled, it cannot be unspilled|
+|[#10063](https://github.com/NVIDIA/spark-rapids/issues/10063)|[BUG] CI build failure with 341db: method getKillReason has weaker access privileges; it should be public|
+|[#10055](https://github.com/NVIDIA/spark-rapids/issues/10055)|[BUG]  array_test.py::test_array_transform_non_deterministic failed with non-UTC time zone|
+|[#10056](https://github.com/NVIDIA/spark-rapids/issues/10056)|[BUG] Unit tests ToPrettyStringSuite FAILED on spark-3.5.0|
+|[#10048](https://github.com/NVIDIA/spark-rapids/issues/10048)|[BUG] Fix ```out of range``` error from ```pySpark``` in ```test_timestamp_millis``` and other two integration test cases|
+|[#4204](https://github.com/NVIDIA/spark-rapids/issues/4204)|casting double to string does not match Spark|
+|[#9938](https://github.com/NVIDIA/spark-rapids/issues/9938)|Better to do some refactor for the Python UDF code|
+|[#10018](https://github.com/NVIDIA/spark-rapids/issues/10018)|[BUG] `GpuToUnixTimestampImproved` off by 1 on GPU when handling timestamp before epoch|
+|[#10012](https://github.com/NVIDIA/spark-rapids/issues/10012)|[BUG] test_str_to_map_expr_random_delimiters with DATAGEN_SEED=1702166057 hangs|
+|[#10029](https://github.com/NVIDIA/spark-rapids/issues/10029)|[BUG] doc links fail with 404 for shims.md|
+|[#9472](https://github.com/NVIDIA/spark-rapids/issues/9472)|[BUG] Non-Deterministic expressions in an array_transform can cause errors|
+|[#9884](https://github.com/NVIDIA/spark-rapids/issues/9884)|[BUG] delta_lake_delete_test.py failed assertion [DATAGEN_SEED=1701225104, IGNORE_ORDER...|
+|[#9977](https://github.com/NVIDIA/spark-rapids/issues/9977)|[BUG] test_cast_date_integral fails on databricks 3.4.1|
+|[#9936](https://github.com/NVIDIA/spark-rapids/issues/9936)|[BUG] Nightly CI of non-UTC time zone reports 'year 0 is out of range' error|
+|[#9941](https://github.com/NVIDIA/spark-rapids/issues/9941)|[BUG] A potential data corruption in Pandas UDFs|
+|[#9897](https://github.com/NVIDIA/spark-rapids/issues/9897)|[BUG] Error message for multiple jars on classpath is wrong|
+|[#9916](https://github.com/NVIDIA/spark-rapids/issues/9916)|[BUG] ```test_cast_string_ts_valid_format``` failed at ```seed = 1701362564```|
+|[#9559](https://github.com/NVIDIA/spark-rapids/issues/9559)|[BUG] precommit regularly fails with error trying to download a dependency|
+|[#9708](https://github.com/NVIDIA/spark-rapids/issues/9708)|[BUG] test_cast_string_ts_valid_format fails with DATAGEN_SEED=1699978422|
+
+### PRs
+|||
+|:---|:---|
+|[#10555](https://github.com/NVIDIA/spark-rapids/pull/10555)|Update change log [skip ci]|
+|[#10551](https://github.com/NVIDIA/spark-rapids/pull/10551)|Try to make degenerative joins here impossible for these tests|
+|[#10546](https://github.com/NVIDIA/spark-rapids/pull/10546)|Update changelog [skip ci]|
+|[#10541](https://github.com/NVIDIA/spark-rapids/pull/10541)|Fix Delta log cache size settings during integration tests|
+|[#10525](https://github.com/NVIDIA/spark-rapids/pull/10525)|Update changelog for v24.02.0 release [skip ci]|
+|[#10465](https://github.com/NVIDIA/spark-rapids/pull/10465)|Add missed shims for scala2.13|
+|[#10511](https://github.com/NVIDIA/spark-rapids/pull/10511)|Update rapids jni and private dependency version to 24.02.1|
+|[#10513](https://github.com/NVIDIA/spark-rapids/pull/10513)|Fix scalar leak in SumBinaryFixer (#10510)|
+|[#10475](https://github.com/NVIDIA/spark-rapids/pull/10475)|Fix scalar leak in RankFixer|
+|[#10461](https://github.com/NVIDIA/spark-rapids/pull/10461)|Preserve tags on FileSourceScanExec|
+|[#10459](https://github.com/NVIDIA/spark-rapids/pull/10459)|[DOC] Fix table rendering issue in github.io download UI page on branch-24.02 [skip ci] |
+|[#10443](https://github.com/NVIDIA/spark-rapids/pull/10443)|Update change log for v24.02.0 release [skip ci]|
+|[#10439](https://github.com/NVIDIA/spark-rapids/pull/10439)|Reverts NVIDIA/spark-rapids#10232 and fixes the plugin build on Databricks 11.3|
+|[#10380](https://github.com/NVIDIA/spark-rapids/pull/10380)|Init changelog 24.02 [skip ci]|
+|[#10367](https://github.com/NVIDIA/spark-rapids/pull/10367)|Update rapids JNI and private version to release 24.02.0|
+|[#10414](https://github.com/NVIDIA/spark-rapids/pull/10414)|[DOC] Fix 24.02.0 documentation errors [skip ci]|
+|[#10403](https://github.com/NVIDIA/spark-rapids/pull/10403)|Cherry-pick: Fix a memory leak in json tuple (#10360)|
+|[#10387](https://github.com/NVIDIA/spark-rapids/pull/10387)|[DOC] Update docs for 24.02.0 release [skip ci]|
+|[#10399](https://github.com/NVIDIA/spark-rapids/pull/10399)|Update NOTICE-binary|
+|[#10389](https://github.com/NVIDIA/spark-rapids/pull/10389)|Change version and branch to 24.02 in docs [skip ci]|
+|[#10384](https://github.com/NVIDIA/spark-rapids/pull/10384)|[DOC] Update docs for 23.12.2 release [skip ci] |
+|[#10309](https://github.com/NVIDIA/spark-rapids/pull/10309)|[DOC] add custom 404 page and fix some document issue [skip ci]|
+|[#10352](https://github.com/NVIDIA/spark-rapids/pull/10352)|xfail mixed type test|
+|[#10355](https://github.com/NVIDIA/spark-rapids/pull/10355)|Revert "Support barrier mode for mapInPandas/mapInArrow (#10343)"|
+|[#10353](https://github.com/NVIDIA/spark-rapids/pull/10353)|Use fixed seed for test_from_json_struct_decimal|
+|[#10343](https://github.com/NVIDIA/spark-rapids/pull/10343)|Support barrier mode for mapInPandas/mapInArrow|
+|[#10345](https://github.com/NVIDIA/spark-rapids/pull/10345)|Fix auto merge conflict 10339 [skip ci]|
+|[#9991](https://github.com/NVIDIA/spark-rapids/pull/9991)|Start to use explicit memory limits in the parquet chunked reader|
+|[#10328](https://github.com/NVIDIA/spark-rapids/pull/10328)|Fix typo in spark-tests.sh [skip ci]|
+|[#10279](https://github.com/NVIDIA/spark-rapids/pull/10279)|Run '--packages' only with default cuda11 jar|
+|[#10273](https://github.com/NVIDIA/spark-rapids/pull/10273)|Support reading JSON data with single quotes around attribute names and values|
+|[#10306](https://github.com/NVIDIA/spark-rapids/pull/10306)|Fix performance regression in from_json|
+|[#10272](https://github.com/NVIDIA/spark-rapids/pull/10272)|Add FullOuter support to GpuShuffledSymmetricHashJoinExec|
+|[#10260](https://github.com/NVIDIA/spark-rapids/pull/10260)|Add perf test for time zone operators|
+|[#10275](https://github.com/NVIDIA/spark-rapids/pull/10275)|Add tests for window Python udf with array input|
+|[#10278](https://github.com/NVIDIA/spark-rapids/pull/10278)|Clean up $M2_CACHE to avoid side-effect of previous dependency:get [skip ci]|
+|[#10268](https://github.com/NVIDIA/spark-rapids/pull/10268)|Add config to enable mixed types as string in GpuJsonToStruct & GpuJsonScan|
+|[#10297](https://github.com/NVIDIA/spark-rapids/pull/10297)|Revert "UCX 1.16.0 upgrade (#10190)"|
+|[#10289](https://github.com/NVIDIA/spark-rapids/pull/10289)|Add gerashegalov to CODEOWNERS [skip ci]|
+|[#10290](https://github.com/NVIDIA/spark-rapids/pull/10290)|Fix merge conflict with 23.12 [skip ci]|
+|[#10190](https://github.com/NVIDIA/spark-rapids/pull/10190)|UCX 1.16.0 upgrade|
+|[#10211](https://github.com/NVIDIA/spark-rapids/pull/10211)|Use parse_url kernel for QUERY literal and column key|
+|[#10267](https://github.com/NVIDIA/spark-rapids/pull/10267)|Update to libcudf unsigned sum aggregation types change|
+|[#10208](https://github.com/NVIDIA/spark-rapids/pull/10208)|Added Support for Lazy Quantifier|
+|[#9993](https://github.com/NVIDIA/spark-rapids/pull/9993)|Enable mixed types as string in GpuJsonScan|
+|[#10246](https://github.com/NVIDIA/spark-rapids/pull/10246)|Refactor full join iterator to allow access to build tracker|
+|[#10257](https://github.com/NVIDIA/spark-rapids/pull/10257)|Enable auto-merge from branch-24.02 to branch-24.04 [skip CI]|
+|[#10178](https://github.com/NVIDIA/spark-rapids/pull/10178)|Mark hash reduction decimal overflow test as a permanent seed override|
+|[#10244](https://github.com/NVIDIA/spark-rapids/pull/10244)|Use POSIX mode in assembly plugin to avoid issues with large UID/GID|
+|[#10238](https://github.com/NVIDIA/spark-rapids/pull/10238)|Smoke test with '--package' to fetch the plugin jar|
+|[#10201](https://github.com/NVIDIA/spark-rapids/pull/10201)|Deploy release candidates to local maven repo for dependency check[skip ci]|
+|[#10240](https://github.com/NVIDIA/spark-rapids/pull/10240)|Improved inner joins with large build side|
+|[#10220](https://github.com/NVIDIA/spark-rapids/pull/10220)|Disable GetJsonObject by default and add tests for as many issues with it as possible|
+|[#10230](https://github.com/NVIDIA/spark-rapids/pull/10230)|Fix Databricks 13.3 BroadcastHashJoin using executor side broadcast fed by ColumnarToRow [Databricks]|
+|[#10232](https://github.com/NVIDIA/spark-rapids/pull/10232)|Fixed 330db Shims to Adopt the PythonRunner Changes|
+|[#10225](https://github.com/NVIDIA/spark-rapids/pull/10225)|Download Maven from apache.org archives [skip ci]|
+|[#10210](https://github.com/NVIDIA/spark-rapids/pull/10210)|Add string parameter support for unix_timestamp for non-UTC time zones|
+|[#10223](https://github.com/NVIDIA/spark-rapids/pull/10223)|Fix to_utc_timestamp and from_utc_timestamp fallback when TZ is supported time zone|
+|[#10205](https://github.com/NVIDIA/spark-rapids/pull/10205)|Deterministic ordering in window tests|
+|[#10204](https://github.com/NVIDIA/spark-rapids/pull/10204)|Further prevent degenerative joins in dpp_test|
+|[#10156](https://github.com/NVIDIA/spark-rapids/pull/10156)|Update string to float compatibility doc[skip ci]|
+|[#10193](https://github.com/NVIDIA/spark-rapids/pull/10193)|Fix explode with carry-along columns on GpuExplode single row retry handling|
+|[#10191](https://github.com/NVIDIA/spark-rapids/pull/10191)|Updating the config documentation for filecache configs [skip ci]|
+|[#10131](https://github.com/NVIDIA/spark-rapids/pull/10131)|With a single row GpuExplode tries to split the generator array|
+|[#10179](https://github.com/NVIDIA/spark-rapids/pull/10179)|Fix build regression against Spark 3.2.x|
+|[#10189](https://github.com/NVIDIA/spark-rapids/pull/10189)|test needs marks for non-UTC and for non_supported timezones|
+|[#10176](https://github.com/NVIDIA/spark-rapids/pull/10176)|Fix format_number NaN symbol in high jdk version|
+|[#10074](https://github.com/NVIDIA/spark-rapids/pull/10074)|Update the legacy mode check: only take effect when reading date/timestamp column|
+|[#10167](https://github.com/NVIDIA/spark-rapids/pull/10167)|Defined Shims Should Be Declared In POM |
+|[#10168](https://github.com/NVIDIA/spark-rapids/pull/10168)|Prevent a degenerative join in test_dpp_reuse_broadcast_exchange|
+|[#10171](https://github.com/NVIDIA/spark-rapids/pull/10171)|Fix `test_cast_timestamp_to_date` when running in a DST time zone|
+|[#9975](https://github.com/NVIDIA/spark-rapids/pull/9975)|Improve dateFormat support in GpuJsonScan and make tests consistent with GpuStructsToJson|
+|[#9790](https://github.com/NVIDIA/spark-rapids/pull/9790)|Support float case of format_number with format_float kernel|
+|[#10144](https://github.com/NVIDIA/spark-rapids/pull/10144)|Support to_utc_timestamp|
+|[#10162](https://github.com/NVIDIA/spark-rapids/pull/10162)|Fix Spark 334 Build|
+|[#10146](https://github.com/NVIDIA/spark-rapids/pull/10146)|Refactor the window code so it is not mostly kept in a few very large files|
+|[#10155](https://github.com/NVIDIA/spark-rapids/pull/10155)|Install procps tools for rocky docker images [skip ci]|
+|[#10153](https://github.com/NVIDIA/spark-rapids/pull/10153)|Disable multi-threaded Maven |
+|[#10100](https://github.com/NVIDIA/spark-rapids/pull/10100)|Enable to_date (via gettimestamp and casting timestamp to date) for non-UTC time zones|
+|[#10140](https://github.com/NVIDIA/spark-rapids/pull/10140)|Removed Unnecessary Whitespaces From Spark 3.3.4 Shim [skip ci]|
+|[#10148](https://github.com/NVIDIA/spark-rapids/pull/10148)|fix test_hash_agg_with_nan_keys floating point sum failure|
+|[#10150](https://github.com/NVIDIA/spark-rapids/pull/10150)|Increase timeouts in HostAllocSuite to avoid timeout failures on slow machines|
+|[#10143](https://github.com/NVIDIA/spark-rapids/pull/10143)|Fix `test_window_aggs_for_batched_finite_row_windows_partitioned` fail|
+|[#9887](https://github.com/NVIDIA/spark-rapids/pull/9887)|Reduce time-consuming of pre-merge|
+|[#10130](https://github.com/NVIDIA/spark-rapids/pull/10130)|Change unit tests that force ooms to specify the oom type (gpu|cpu)|
+|[#10138](https://github.com/NVIDIA/spark-rapids/pull/10138)|Update copyright dates in NOTICE files [skip ci]|
+|[#10139](https://github.com/NVIDIA/spark-rapids/pull/10139)|Add Delta Lake 2.3.0 to list of versions to test for Spark 3.3.x|
+|[#10135](https://github.com/NVIDIA/spark-rapids/pull/10135)|Fix CI: can't find script when there is pushd in script [skip ci]|
+|[#10137](https://github.com/NVIDIA/spark-rapids/pull/10137)|Fix the canonicalizing for GPU file scan|
+|[#10132](https://github.com/NVIDIA/spark-rapids/pull/10132)|Disable collect_list and collect_set for window by default|
+|[#10084](https://github.com/NVIDIA/spark-rapids/pull/10084)|Refactor GpuJsonToStruct to reduce code duplication and manage resources more efficiently|
+|[#10087](https://github.com/NVIDIA/spark-rapids/pull/10087)|Additional unit tests for GeneratedInternalRowToCudfRowIterator|
+|[#10082](https://github.com/NVIDIA/spark-rapids/pull/10082)|Add Spark 3.3.4 Shim|
+|[#10054](https://github.com/NVIDIA/spark-rapids/pull/10054)|Support Ascii function for ascii and latin-1|
+|[#10127](https://github.com/NVIDIA/spark-rapids/pull/10127)|Fix merge conflict with branch-23.12|
+|[#10097](https://github.com/NVIDIA/spark-rapids/pull/10097)|[DOC] Update docs for 23.12.1 release [skip ci]|
+|[#10109](https://github.com/NVIDIA/spark-rapids/pull/10109)|Fixes a bug where datagen seed overrides were sticky and adds datagen_seed_override_disabled|
+|[#10093](https://github.com/NVIDIA/spark-rapids/pull/10093)|Fix test_unsupported_fallback_regexp_replace|
+|[#10119](https://github.com/NVIDIA/spark-rapids/pull/10119)|Fix from_utc_timestamp case failure on Cloudera when TZ is Iran|
+|[#10106](https://github.com/NVIDIA/spark-rapids/pull/10106)|Add `isFailed()` to MockTaskContext and Remove MockTaskContextBase.scala|
+|[#10112](https://github.com/NVIDIA/spark-rapids/pull/10112)|Remove datagen seed override for test_conditional_with_side_effects_cast|
+|[#10104](https://github.com/NVIDIA/spark-rapids/pull/10104)|[DOC] Add in docs about memory debugging [skip ci]|
+|[#9925](https://github.com/NVIDIA/spark-rapids/pull/9925)|Use threads, cache Scala compiler in GH mvn workflow|
+|[#9967](https://github.com/NVIDIA/spark-rapids/pull/9967)|Added Spark-3.4.2 Shims|
+|[#10061](https://github.com/NVIDIA/spark-rapids/pull/10061)|Use parse_url kernel for QUERY parsing|
+|[#10101](https://github.com/NVIDIA/spark-rapids/pull/10101)|[DOC] Add column order error docs [skip ci]|
+|[#10078](https://github.com/NVIDIA/spark-rapids/pull/10078)|Add perf test for non-UTC operators|
+|[#10096](https://github.com/NVIDIA/spark-rapids/pull/10096)|Shim MockTaskContext to fix Spark 3.5.1 build|
+|[#10092](https://github.com/NVIDIA/spark-rapids/pull/10092)|Implement Math.round using floor on GPU|
+|[#10085](https://github.com/NVIDIA/spark-rapids/pull/10085)|Update tests that originally restricted the Spark timestamp range|
+|[#10090](https://github.com/NVIDIA/spark-rapids/pull/10090)|Replace GPU-unsupported `\z` with an alternative RLIKE expression|
+|[#10095](https://github.com/NVIDIA/spark-rapids/pull/10095)|Temporarily fix date format failed cases for non-UTC time zone.|
+|[#9999](https://github.com/NVIDIA/spark-rapids/pull/9999)|Add some odd time zones for timezone transition tests|
+|[#9962](https://github.com/NVIDIA/spark-rapids/pull/9962)|Add 3.5.1-SNAPSHOT Shim|
+|[#10071](https://github.com/NVIDIA/spark-rapids/pull/10071)|Cleanup usage of non-utc configuration here|
+|[#10057](https://github.com/NVIDIA/spark-rapids/pull/10057)|Add support for StringConcatFactory.makeConcatWithConstants (#9555)|
+|[#9996](https://github.com/NVIDIA/spark-rapids/pull/9996)|Test full timestamp output range in PySpark|
+|[#10081](https://github.com/NVIDIA/spark-rapids/pull/10081)|Add a fallback Cloudera Maven repo URL [skip ci]|
+|[#10065](https://github.com/NVIDIA/spark-rapids/pull/10065)|Improve host memory spill interfaces|
+|[#10069](https://github.com/NVIDIA/spark-rapids/pull/10069)|Revert "Support split broadcast join condition into ast and non-ast […|
+|[#10070](https://github.com/NVIDIA/spark-rapids/pull/10070)|Fix 332db build failure|
+|[#10060](https://github.com/NVIDIA/spark-rapids/pull/10060)|Fix failed cases for non-utc time zone|
+|[#10038](https://github.com/NVIDIA/spark-rapids/pull/10038)|Remove spark.rapids.sql.nonUTC.enabled configuration option|
+|[#10059](https://github.com/NVIDIA/spark-rapids/pull/10059)|Fixed Failing ToPrettyStringSuite Test for 3.5.0|
+|[#10013](https://github.com/NVIDIA/spark-rapids/pull/10013)|Extended configuration of OOM injection mode|
+|[#10052](https://github.com/NVIDIA/spark-rapids/pull/10052)|Set seed=0 for some integration test cases|
+|[#10053](https://github.com/NVIDIA/spark-rapids/pull/10053)|Remove invalid user from CODEOWNER file [skip ci]|
+|[#10049](https://github.com/NVIDIA/spark-rapids/pull/10049)|Fix out of range error from pySpark in test_timestamp_millis and other two integration test cases|
+|[#9721](https://github.com/NVIDIA/spark-rapids/pull/9721)|Support date_format via Gpu for non-UTC time zone|
+|[#9470](https://github.com/NVIDIA/spark-rapids/pull/9470)|Use float to string kernel|
+|[#9845](https://github.com/NVIDIA/spark-rapids/pull/9845)|Use parse_url kernel for HOST parsing|
+|[#10024](https://github.com/NVIDIA/spark-rapids/pull/10024)|Support hour minute second for non-UTC time zone|
+|[#9973](https://github.com/NVIDIA/spark-rapids/pull/9973)|Batching support for row-based bounded window functions |
+|[#10042](https://github.com/NVIDIA/spark-rapids/pull/10042)|Update tests to not have hard coded fallback when not needed|
+|[#9816](https://github.com/NVIDIA/spark-rapids/pull/9816)|Support unix_timestamp and to_unix_timestamp with non-UTC timezones (non-DST)|
+|[#9902](https://github.com/NVIDIA/spark-rapids/pull/9902)|Some refactor for the Python UDF code|
+|[#10023](https://github.com/NVIDIA/spark-rapids/pull/10023)|GPU supports `yyyyMMdd` format by post process for the `from_unixtime` function|
+|[#10033](https://github.com/NVIDIA/spark-rapids/pull/10033)|Remove GpuToTimestampImproved and spark.rapids.sql.improvedTimeOps.enabled|
+|[#10016](https://github.com/NVIDIA/spark-rapids/pull/10016)|Fix infinite loop in test_str_to_map_expr_random_delimiters|
+|[#9481](https://github.com/NVIDIA/spark-rapids/pull/9481)|Use parse_url kernel for PROTOCOL parsing|
+|[#10030](https://github.com/NVIDIA/spark-rapids/pull/10030)|Update links in shims.md|
+|[#10015](https://github.com/NVIDIA/spark-rapids/pull/10015)|Fix array_transform to not recompute the argument|
+|[#10011](https://github.com/NVIDIA/spark-rapids/pull/10011)|Add cpu oom retry split handling to InternalRowToColumnarBatchIterator|
+|[#10019](https://github.com/NVIDIA/spark-rapids/pull/10019)|Fix auto merge conflict 10010 [skip ci]|
+|[#9760](https://github.com/NVIDIA/spark-rapids/pull/9760)|Support split broadcast join condition into ast and non-ast|
+|[#9827](https://github.com/NVIDIA/spark-rapids/pull/9827)|Enable ORC timestamp and decimal predicate push down tests|
+|[#10002](https://github.com/NVIDIA/spark-rapids/pull/10002)|Use Spark 3.3.3 instead of 3.3.2 for Scala 2.13 premerge builds|
+|[#10000](https://github.com/NVIDIA/spark-rapids/pull/10000)|Optimize from_unixtime|
+|[#10003](https://github.com/NVIDIA/spark-rapids/pull/10003)|Fix merge conflict with branch-23.12|
+|[#9984](https://github.com/NVIDIA/spark-rapids/pull/9984)|Fix 340+(including DB341+) does not support casting date to integral/float|
+|[#9972](https://github.com/NVIDIA/spark-rapids/pull/9972)|Fix year 0 is out of range in test_from_json_struct_timestamp |
+|[#9814](https://github.com/NVIDIA/spark-rapids/pull/9814)|Support from_unixtime via Gpu for non-UTC time zone|
+|[#9929](https://github.com/NVIDIA/spark-rapids/pull/9929)|Add host memory retries for GeneratedInternalRowToCudfRowIterator|
+|[#9957](https://github.com/NVIDIA/spark-rapids/pull/9957)|Update cases for cast between integral and (date/time)|
+|[#9959](https://github.com/NVIDIA/spark-rapids/pull/9959)|Append new authorized user to blossom-ci whitelist [skip ci]|
+|[#9942](https://github.com/NVIDIA/spark-rapids/pull/9942)|Fix a potential data corruption for Pandas UDF|
+|[#9922](https://github.com/NVIDIA/spark-rapids/pull/9922)|Fix `allowMultipleJars` recommend setting message|
+|[#9947](https://github.com/NVIDIA/spark-rapids/pull/9947)|Fix merge conflict with branch-23.12|
+|[#9908](https://github.com/NVIDIA/spark-rapids/pull/9908)|Register default allocator for host memory|
+|[#9944](https://github.com/NVIDIA/spark-rapids/pull/9944)|Fix Java OOM caused by incorrect state of shouldCapture when exception occurred|
+|[#9937](https://github.com/NVIDIA/spark-rapids/pull/9937)|Refactor to use CLASSIFIER instead of CUDA_CLASSIFIER [skip ci]|
+|[#9904](https://github.com/NVIDIA/spark-rapids/pull/9904)|Params for build and test CI scripts on Databricks|
+|[#9719](https://github.com/NVIDIA/spark-rapids/pull/9719)|Support fine grained timezone checker instead of type based|
+|[#9918](https://github.com/NVIDIA/spark-rapids/pull/9918)|Prevent generation of 'year 0 is out of range' strings in IT|
+|[#9852](https://github.com/NVIDIA/spark-rapids/pull/9852)|Avoid generating duplicate nan keys with MapGen(FloatGen)|
+|[#9674](https://github.com/NVIDIA/spark-rapids/pull/9674)|Add cache action to speed up mvn workflow [skip ci]|
+|[#9900](https://github.com/NVIDIA/spark-rapids/pull/9900)|Revert "Remove Databricks 13.3 from release 23.12  (#9890)"|
+|[#9889](https://github.com/NVIDIA/spark-rapids/pull/9889)|Fix test_cast_string_ts_valid_format test|
+|[#9888](https://github.com/NVIDIA/spark-rapids/pull/9888)|Update nightly build and deploy script for arm artifacts [skip ci]|
+|[#9833](https://github.com/NVIDIA/spark-rapids/pull/9833)|Fix a hang for Pandas UDFs on DB 13.3|
+|[#9656](https://github.com/NVIDIA/spark-rapids/pull/9656)|Update for new retry state machine JNI APIs|
+|[#9654](https://github.com/NVIDIA/spark-rapids/pull/9654)|Detect multiple jars on the classpath when init plugin|
+|[#9857](https://github.com/NVIDIA/spark-rapids/pull/9857)|Skip redundant steps in nightly build [skip ci]|
+|[#9812](https://github.com/NVIDIA/spark-rapids/pull/9812)|Update JNI and private dep version to 24.02.0-SNAPSHOT|
+|[#9716](https://github.com/NVIDIA/spark-rapids/pull/9716)|Initiate project version 24.02.0-SNAPSHOT|
+
diff --git a/docs/download.md b/docs/download.md
index a7b6bd23a4a..f786f5a217d 100644
--- a/docs/download.md
+++ b/docs/download.md
@@ -18,7 +18,7 @@ cuDF jar, that is either preinstalled in the Spark classpath on all nodes or sub
 that uses the RAPIDS Accelerator For Apache Spark. See the [getting-started
 guide](https://docs.nvidia.com/spark-rapids/user-guide/latest/getting-started/overview.html) for more details.
 
-## Release v24.04.1
+## Release v24.06.0
 ### Hardware Requirements:
 
 The plugin is tested on the following architectures:
@@ -41,7 +41,7 @@ The plugin is tested on the following architectures:
 	Supported Spark versions:
 		Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.2.3, 3.2.4
 		Apache Spark 3.3.0, 3.3.1, 3.3.2, 3.3.3, 3.3.4
-		Apache Spark 3.4.0, 3.4.1, 3.4.2
+		Apache Spark 3.4.0, 3.4.1, 3.4.2, 3.4.3
 		Apache Spark 3.5.0, 3.5.1
 	
 	Supported Databricks runtime versions for Azure and AWS:
@@ -57,6 +57,7 @@ The plugin is tested on the following architectures:
 		Spark runtime 1.1 LTS
 		Spark runtime 2.0
 		Spark runtime 2.1
+		Spark runtime 2.2
 
 *Some hardware may have a minimum driver version greater than R470. Check the GPU spec sheet
 for your hardware's minimum driver version.
@@ -67,14 +68,14 @@ for your hardware's minimum driver version.
 ### RAPIDS Accelerator's Support Policy for Apache Spark
 The RAPIDS Accelerator maintains support for Apache Spark versions available for download from [Apache Spark](https://spark.apache.org/downloads.html)
 
-### Download RAPIDS Accelerator for Apache Spark v24.04.1
+### Download RAPIDS Accelerator for Apache Spark v24.06.0
 
 | Processor | Scala Version | Download Jar | Download Signature |
 |-----------|---------------|--------------|--------------------|
-| x86_64    | Scala 2.12    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1.jar.asc) |
-| x86_64    | Scala 2.13    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1.jar.asc) |
-| arm64     | Scala 2.12    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.04.1/rapids-4-spark_2.12-24.04.1-cuda11-arm64.jar.asc) |
-| arm64     | Scala 2.13    | [RAPIDS Accelerator v24.04.1](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.04.1/rapids-4-spark_2.13-24.04.1-cuda11-arm64.jar.asc) |
+| x86_64    | Scala 2.12    | [RAPIDS Accelerator v24.06.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0.jar.asc) |
+| x86_64    | Scala 2.13    | [RAPIDS Accelerator v24.06.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.06.0/rapids-4-spark_2.13-24.06.0.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.06.0/rapids-4-spark_2.13-24.06.0.jar.asc) |
+| arm64     | Scala 2.12    | [RAPIDS Accelerator v24.06.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/24.06.0/rapids-4-spark_2.12-24.06.0-cuda11-arm64.jar.asc) |
+| arm64     | Scala 2.13    | [RAPIDS Accelerator v24.06.0](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.06.0/rapids-4-spark_2.13-24.06.0-cuda11-arm64.jar) | [Signature](https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.13/24.06.0/rapids-4-spark_2.13-24.06.0-cuda11-arm64.jar.asc) |
 
 This package is built against CUDA 11.8. It is tested on V100, T4, A10, A100, L4 and H100 GPUs with 
 CUDA 11.8 through CUDA 12.0.
@@ -83,24 +84,20 @@ CUDA 11.8 through CUDA 12.0.
 * Download the [PUB_KEY](https://keys.openpgp.org/search?q=sw-spark@nvidia.com).
 * Import the public key: `gpg --import PUB_KEY`
 * Verify the signature for Scala 2.12 jar:
-    `gpg --verify rapids-4-spark_2.12-24.04.1.jar.asc rapids-4-spark_2.12-24.04.1.jar`
+    `gpg --verify rapids-4-spark_2.12-24.06.0.jar.asc rapids-4-spark_2.12-24.06.0.jar`
 * Verify the signature for Scala 2.13 jar:
-    `gpg --verify rapids-4-spark_2.13-24.04.1.jar.asc rapids-4-spark_2.13-24.04.1.jar`
+    `gpg --verify rapids-4-spark_2.13-24.06.0.jar.asc rapids-4-spark_2.13-24.06.0.jar`
 
 The output of signature verify:
 
 	gpg: Good signature from "NVIDIA Spark (For the signature of spark-rapids release jars) <sw-spark@nvidia.com>"
 
 ### Release Notes
-* New functionality and performance improvements for this release include:
-* Performance improvements for S3 reading. 
-Refer to perfio.s3.enabled in [advanced_configs](./additional-functionality/advanced_configs.md) for more details.
-* Performance improvements when doing a joins on unique keys.
-* Enhanced decompression kernels for zstd and snappy.
-* Enhanced Parquet reading performance with modular kernels.
-* Added compatibility with Spark version 3.5.1.
-* Deprecated support for Databricks 10.4 ML LTS.
-* For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases).
+* Improve support for Unity Catalog on Databricks
+* Added support for parse_url PATH
+* Added support for array_filter
+* Added support for Spark 3.4.3
+* For updates on RAPIDS Accelerator Tools, please visit [this link](https://github.com/NVIDIA/spark-rapids-tools/releases)
 
 For a detailed list of changes, please refer to the
 [CHANGELOG](https://github.com/NVIDIA/spark-rapids/blob/main/CHANGELOG.md).
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
index dec93e6f22a..18c26aa26e7 100755
--- a/integration_tests/run_pyspark_from_build.sh
+++ b/integration_tests/run_pyspark_from_build.sh
@@ -171,11 +171,16 @@ else
         TEST_TYPE_PARAM="--test_type $TEST_TYPE"
     fi
 
+    # We found that when parallelism > 8, as it increases, the test speed will become slower and slower. So we set the default maximum parallelism to 8.
+    # Note that MAX_PARALLEL varies with the hardware, OS, and test case. Please overwrite it with an appropriate value if needed.
+    MAX_PARALLEL=${MAX_PARALLEL:-8}
     if [[ ${TEST_PARALLEL} -lt 2 ]];
     then
         # With xdist 0 and 1 are the same parallelism but
         # 0 is more efficient
         TEST_PARALLEL_OPTS=()
+    elif [[ ${TEST_PARALLEL} -gt ${MAX_PARALLEL} ]]; then
+        TEST_PARALLEL_OPTS=("-n" "$MAX_PARALLEL")
     else
         TEST_PARALLEL_OPTS=("-n" "$TEST_PARALLEL")
     fi
@@ -245,6 +250,12 @@ else
     DRIVER_EXTRA_JAVA_OPTIONS="-ea -Duser.timezone=$TZ -Ddelta.log.cacheSize=$deltaCacheSize"
     export PYSP_TEST_spark_driver_extraJavaOptions="$DRIVER_EXTRA_JAVA_OPTIONS $COVERAGE_SUBMIT_FLAGS"
     export PYSP_TEST_spark_executor_extraJavaOptions="-ea -Duser.timezone=$TZ"
+
+    # Set driver memory to speed up tests such as deltalake
+    if [[ -n "${DRIVER_MEMORY}" ]]; then
+        export PYSP_TEST_spark_driver_memory="${DRIVER_MEMORY}"
+    fi
+
     export PYSP_TEST_spark_ui_showConsoleProgress='false'
     export PYSP_TEST_spark_sql_session_timeZone=$TZ
     export PYSP_TEST_spark_sql_shuffle_partitions='4'
diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index 2e6c36b77d9..fb1627af75b 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -159,7 +159,8 @@ def __repr__(self):
         return super().__repr__() + '(' + str(self._child_gen) + ')'
 
     def _cache_repr(self):
-        return super()._cache_repr() + '(' + self._child_gen._cache_repr() + ')'
+        return (super()._cache_repr() + '(' + self._child_gen._cache_repr() +
+                ',' + str(self._func.__code__) + ')' )
 
     def start(self, rand):
         self._child_gen.start(rand)
@@ -667,7 +668,10 @@ def __repr__(self):
         return super().__repr__() + '(' + str(self._child_gen) + ')'
 
     def _cache_repr(self):
-        return super()._cache_repr() + '(' + self._child_gen._cache_repr() + ')'
+        return (super()._cache_repr() + '(' + self._child_gen._cache_repr() +
+                ',' + str(self._min_length) + ',' + str(self._max_length) + ',' +
+                str(self.all_null) + ',' + str(self.convert_to_tuple) + ')')
+
 
     def start(self, rand):
         self._child_gen.start(rand)
@@ -701,7 +705,8 @@ def __repr__(self):
         return super().__repr__() + '(' + str(self._key_gen) + ',' + str(self._value_gen) + ')'
 
     def _cache_repr(self):
-        return super()._cache_repr() + '(' + self._key_gen._cache_repr() + ',' + self._value_gen._cache_repr() + ')'
+        return (super()._cache_repr() + '(' + self._key_gen._cache_repr() + ',' + self._value_gen._cache_repr() +
+                ',' + str(self._min_length) + ',' + str(self._max_length) + ')')
 
     def start(self, rand):
         self._key_gen.start(rand)
@@ -769,12 +774,13 @@ def __init__(self, min_value=MIN_DAY_TIME_INTERVAL, max_value=MAX_DAY_TIME_INTER
         self._min_micros = (math.floor(min_value.total_seconds()) * 1000000) + min_value.microseconds
         self._max_micros = (math.floor(max_value.total_seconds()) * 1000000) + max_value.microseconds
         fields = ["day", "hour", "minute", "second"]
-        start_index = fields.index(start_field)
-        end_index = fields.index(end_field)
-        if start_index > end_index:
+        self._start_index = fields.index(start_field)
+        self._end_index = fields.index(end_field)
+        if self._start_index > self._end_index:
             raise RuntimeError('Start field {}, end field {}, valid fields is {}, start field index should <= end '
                                'field index'.format(start_field, end_field, fields))
-        super().__init__(DayTimeIntervalType(start_index, end_index), nullable=nullable, special_cases=special_cases)
+        super().__init__(DayTimeIntervalType(self._start_index, self._end_index), nullable=nullable,
+                         special_cases=special_cases)
 
     def _gen_random(self, rand):
         micros = rand.randint(self._min_micros, self._max_micros)
@@ -784,7 +790,8 @@ def _gen_random(self, rand):
         return timedelta(microseconds=micros)
     
     def _cache_repr(self):
-        return super()._cache_repr() + '(' + str(self._min_micros) + ',' + str(self._max_micros) + ')'
+        return (super()._cache_repr() + '(' + str(self._min_micros) + ',' + str(self._max_micros) +
+                ',' + str(self._start_index) + ',' + str(self._end_index) + ')')
 
     def start(self, rand):
         self._start(rand, lambda: self._gen_random(rand))
diff --git a/integration_tests/src/main/python/fastparquet_compatibility_test.py b/integration_tests/src/main/python/fastparquet_compatibility_test.py
index 53a99d32bd2..4b0fc2827f4 100644
--- a/integration_tests/src/main/python/fastparquet_compatibility_test.py
+++ b/integration_tests/src/main/python/fastparquet_compatibility_test.py
@@ -30,6 +30,8 @@ def fastparquet_unavailable():
         return False
     except ImportError:
         return True
+    except ValueError: # TODO: remove when https://github.com/NVIDIA/spark-rapids/issues/11070 is fixed
+        return True
 
 
 rebase_write_corrected_conf = {
diff --git a/integration_tests/src/main/python/hive_parquet_write_test.py b/integration_tests/src/main/python/hive_parquet_write_test.py
new file mode 100644
index 00000000000..f62439a39af
--- /dev/null
+++ b/integration_tests/src/main/python/hive_parquet_write_test.py
@@ -0,0 +1,176 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from asserts import assert_gpu_and_cpu_sql_writes_are_equal_collect
+from conftest import is_databricks_runtime
+from data_gen import *
+from hive_write_test import _restricted_timestamp
+from marks import allow_non_gpu, ignore_order
+from spark_session import with_cpu_session, is_before_spark_320, is_spark_350_or_later
+
+# Disable the meta conversion from Hive write to FrameData write in Spark, to test
+# "GpuInsertIntoHiveTable" for Parquet write.
+_write_to_hive_conf = {"spark.sql.hive.convertMetastoreParquet": False}
+
+_hive_basic_gens = [
+    byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, string_gen, boolean_gen,
+    DateGen(start=date(1590, 1, 1)), _restricted_timestamp(),
+    DecimalGen(precision=19, scale=1, nullable=True),
+    DecimalGen(precision=23, scale=5, nullable=True),
+    DecimalGen(precision=36, scale=3, nullable=True)]
+
+_hive_basic_struct_gen = StructGen(
+    [['c'+str(ind), c_gen] for ind, c_gen in enumerate(_hive_basic_gens)])
+
+_hive_struct_gens = [
+    _hive_basic_struct_gen,
+    StructGen([['child0', byte_gen], ['child1', _hive_basic_struct_gen]]),
+    StructGen([['child0', ArrayGen(short_gen)], ['child1', double_gen]])]
+
+_hive_array_gens = [ArrayGen(sub_gen) for sub_gen in _hive_basic_gens] + [
+    ArrayGen(ArrayGen(short_gen, max_length=10), max_length=10),
+    ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10),
+    ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))]
+
+_hive_map_gens = [simple_string_to_string_map_gen] + [MapGen(f(nullable=False), f()) for f in [
+    BooleanGen, ByteGen, ShortGen, IntegerGen, LongGen, FloatGen, DoubleGen,
+    lambda nullable=True: _restricted_timestamp(nullable=nullable),
+    lambda nullable=True: DateGen(start=date(1590, 1, 1), nullable=nullable),
+    lambda nullable=True: DecimalGen(precision=19, scale=1, nullable=nullable),
+    lambda nullable=True: DecimalGen(precision=36, scale=5, nullable=nullable)]]
+
+_hive_write_gens = [_hive_basic_gens, _hive_struct_gens, _hive_array_gens, _hive_map_gens]
+
+# ProjectExec falls back on databricks due to no GPU version of "MapFromArrays".
+fallback_nodes = ['ProjectExec'] if is_databricks_runtime() or is_spark_350_or_later() else []
+
+
+@allow_non_gpu(*(non_utc_allow + fallback_nodes))
+@ignore_order(local=True)
+@pytest.mark.parametrize("is_ctas", [True, False], ids=['CTAS', 'CTTW'])
+@pytest.mark.parametrize("gens", _hive_write_gens, ids=idfn)
+def test_write_parquet_into_hive_table(spark_tmp_table_factory, is_ctas, gens):
+
+    def gen_table(spark):
+        gen_list = [('_c' + str(i), gen) for i, gen in enumerate(gens)]
+        types_sql_str = ','.join('{} {}'.format(
+            name, gen.data_type.simpleString()) for name, gen in gen_list)
+        data_table = spark_tmp_table_factory.get()
+        gen_df(spark, gen_list).createOrReplaceTempView(data_table)
+        return data_table, types_sql_str
+
+    (input_table, input_schema) = with_cpu_session(gen_table)
+
+    def write_to_hive_sql(spark, output_table):
+        if is_ctas:
+            # Create Table As Select
+            return [
+                "CREATE TABLE {} STORED AS PARQUET AS SELECT * FROM {}".format(
+                    output_table, input_table)
+            ]
+        else:
+            # Create Table Then Write
+            return [
+                "CREATE TABLE {} ({}) STORED AS PARQUET".format(output_table, input_schema),
+                "INSERT OVERWRITE TABLE {} SELECT * FROM {}".format(output_table, input_table)
+            ]
+
+    assert_gpu_and_cpu_sql_writes_are_equal_collect(
+        spark_tmp_table_factory,
+        write_to_hive_sql,
+        _write_to_hive_conf)
+
+
+@allow_non_gpu(*non_utc_allow)
+@ignore_order(local=True)
+@pytest.mark.parametrize("is_static", [True, False], ids=['Static_Partition', 'Dynamic_Partition'])
+def test_write_parquet_into_partitioned_hive_table(spark_tmp_table_factory, is_static):
+    # Generate hive table in Parquet format
+    def gen_table(spark):
+        # gen_list = [('_c' + str(i), gen) for i, gen in enumerate(gens)]
+        dates = [date(2024, 2, 28), date(2024, 2, 27), date(2024, 2, 26)]
+        gen_list = [('a', int_gen),
+                    ('b', long_gen),
+                    ('c', short_gen),
+                    ('d', string_gen),
+                    ('part', SetValuesGen(DateType(), dates))]
+        data_table = spark_tmp_table_factory.get()
+        gen_df(spark, gen_list).createOrReplaceTempView(data_table)
+        return data_table
+
+    input_table = with_cpu_session(gen_table)
+
+    def partitioned_write_to_hive_sql(spark, output_table):
+        sql_create_part_table = (
+            "CREATE TABLE {} (a INT, b LONG, c SHORT, d STRING) "
+            "PARTITIONED BY (part DATE) STORED AS PARQUET"
+        ).format(output_table)
+        if is_static:
+            return [
+                # sql_1: Create partitioned hive table
+                sql_create_part_table,
+                # sql_2: Static partition write only to partition 'par2'
+                "INSERT OVERWRITE TABLE {} PARTITION (part='2024-02-25') "
+                "SELECT a, b, c, d FROM {}".format(output_table, input_table)
+            ]
+        else:
+            return [
+                # sql_1: Create partitioned hive table
+                sql_create_part_table,
+                # sql_2: Dynamic partition write
+                "INSERT OVERWRITE TABLE {} SELECT * FROM {}".format(output_table, input_table)
+            ]
+    all_confs = copy_and_update(_write_to_hive_conf, {
+        "hive.exec.dynamic.partition.mode": "nonstrict"})
+    assert_gpu_and_cpu_sql_writes_are_equal_collect(
+        spark_tmp_table_factory,
+        partitioned_write_to_hive_sql,
+        all_confs)
+
+
+zstd_param = pytest.param('ZSTD',
+    marks=pytest.mark.skipif(is_before_spark_320(), reason="zstd is not supported before 320"))
+
+@allow_non_gpu(*(non_utc_allow + fallback_nodes))
+@ignore_order(local=True)
+@pytest.mark.parametrize("comp_type", ['UNCOMPRESSED', 'SNAPPY', zstd_param])
+def test_write_compressed_parquet_into_hive_table(spark_tmp_table_factory, comp_type):
+    # Generate hive table in Parquet format
+    def gen_table(spark):
+        gens = _hive_basic_gens + _hive_struct_gens + _hive_array_gens + _hive_map_gens
+        gen_list = [('_c' + str(i), gen) for i, gen in enumerate(gens)]
+        types_sql_str = ','.join('{} {}'.format(
+            name, gen.data_type.simpleString()) for name, gen in gen_list)
+        data_table = spark_tmp_table_factory.get()
+        gen_df(spark, gen_list).createOrReplaceTempView(data_table)
+        return data_table, types_sql_str
+
+    input_table, schema_str = with_cpu_session(gen_table)
+
+    def write_to_hive_sql(spark, output_table):
+        return [
+            # Create table with compression type
+            "CREATE TABLE {} ({}) STORED AS PARQUET "
+            "TBLPROPERTIES ('parquet.compression'='{}')".format(
+                output_table, schema_str, comp_type),
+            # Insert into table
+            "INSERT OVERWRITE TABLE {} SELECT * FROM {}".format(output_table, input_table)
+        ]
+
+    assert_gpu_and_cpu_sql_writes_are_equal_collect(
+        spark_tmp_table_factory,
+        write_to_hive_sql,
+        _write_to_hive_conf)
diff --git a/integration_tests/src/main/python/parquet_write_test.py b/integration_tests/src/main/python/parquet_write_test.py
index 99a2d4241e8..38dab9e84a4 100644
--- a/integration_tests/src/main/python/parquet_write_test.py
+++ b/integration_tests/src/main/python/parquet_write_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -224,6 +224,10 @@ def test_all_null_int96(spark_tmp_path):
     class AllNullTimestampGen(TimestampGen):
         def start(self, rand):
             self._start(rand, lambda : None)
+
+        def _cache_repr(self):
+            return super()._cache_repr() + '(all_nulls)'
+
     data_path = spark_tmp_path + '/PARQUET_DATA'
     confs = copy_and_update(writer_confs, {'spark.sql.parquet.outputTimestampType': 'INT96'})
     assert_gpu_and_cpu_writes_are_equal_collect(
diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py
index 89929eb6762..18a83870d83 100644
--- a/integration_tests/src/main/python/regexp_test.py
+++ b/integration_tests/src/main/python/regexp_test.py
@@ -454,6 +454,7 @@ def test_rlike_rewrite_optimization():
                 'rlike(a, "(.*)(abb)(.*)")',
                 'rlike(a, "^(abb)(.*)")',
                 'rlike(a, "^abb")',
+                'rlike(a, "^.*(aaa)")',
                 'rlike(a, "\\\\A(abb)(.*)")',
                 'rlike(a, "\\\\Aabb")',
                 'rlike(a, "^(abb)\\\\Z")',
@@ -466,7 +467,12 @@ def test_rlike_rewrite_optimization():
                 'rlike(a, "ab[a-c]{3}")',
                 'rlike(a, "a[a-c]{1,3}")',
                 'rlike(a, "a[a-c]{1,}")',
-                'rlike(a, "a[a-c]+")'),
+                'rlike(a, "a[a-c]+")',
+                'rlike(a, "(aaa|bbb|ccc)")',
+                'rlike(a, ".*.*(aaa|bbb).*.*")',
+                'rlike(a, "^.*(aaa|bbb|ccc)")',
+                'rlike(a, "aaa|bbb")',
+                'rlike(a, "aaa|(bbb|ccc)")'),
         conf=_regexp_conf)
 
 def test_regexp_replace_character_set_negated():
diff --git a/integration_tests/src/main/python/spark_session.py b/integration_tests/src/main/python/spark_session.py
index 78e0b08a651..c55f1976497 100644
--- a/integration_tests/src/main/python/spark_session.py
+++ b/integration_tests/src/main/python/spark_session.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -220,6 +220,9 @@ def is_spark_341():
 def is_spark_350_or_later():
     return spark_version() >= "3.5.0"
 
+def is_spark_351_or_later():
+    return spark_version() >= "3.5.1"
+
 def is_spark_330():
     return spark_version() == "3.3.0"
 
diff --git a/jenkins/Jenkinsfile-blossom.premerge b/jenkins/Jenkinsfile-blossom.premerge
index e7bb8af2cdd..d61638d901a 100755
--- a/jenkins/Jenkinsfile-blossom.premerge
+++ b/jenkins/Jenkinsfile-blossom.premerge
@@ -57,7 +57,8 @@ pipeline {
     }
 
     parameters {
-        string(name: 'REF', defaultValue: '',
+        // Put a default value for REF to avoid error when running the pipeline manually
+        string(name: 'REF', defaultValue: 'main',
             description: 'Merged commit of specific PR')
         string(name: 'GITHUB_DATA', defaultValue: '',
             description: 'Json-formatted github data from upstream blossom-ci')
@@ -273,7 +274,7 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                     }
                 } // end of Unit Test stage
 
-                stage('Databricks') {
+                stage('Databricks IT part1') {
                     when {
                         expression { db_build }
                     }
@@ -284,17 +285,42 @@ git --no-pager diff --name-only HEAD \$BASE -- ${PREMERGE_DOCKERFILE} || true"""
                                 propagate: false, wait: true,
                                 parameters: [
                                         string(name: 'REF', value: params.REF),
-                                        string(name: 'GITHUB_DATA', value: params.GITHUB_DATA)
+                                        string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
+                                        string(name: 'TEST_MODE', value: 'CI_PART1')
                                 ])
                             if ( DBJob.result != 'SUCCESS' ) {
                                 // Output Databricks failure logs to uploaded onto the pre-merge PR
                                 print(DBJob.getRawBuild().getLog())
                                 // Fail the pipeline
-                                error "Databricks build result : " + DBJob.result
+                                error "Databricks part1 result : " + DBJob.result
                             }
                         }
                     }
-                } // end of Databricks
+                } // end of Databricks IT part1
+
+                stage('Databricks IT part2') {
+                    when {
+                        expression { db_build }
+                    }
+                    steps {
+                        script {
+                            githubHelper.updateCommitStatus("", "Running - includes databricks", GitHubCommitState.PENDING)
+                            def DBJob = build(job: 'rapids-databricks_premerge-github',
+                                propagate: false, wait: true,
+                                parameters: [
+                                        string(name: 'REF', value: params.REF),
+                                        string(name: 'GITHUB_DATA', value: params.GITHUB_DATA),
+                                        string(name: 'TEST_MODE', value: 'CI_PART2')
+                                ])
+                            if ( DBJob.result != 'SUCCESS' ) {
+                                // Output Databricks failure logs to uploaded onto the pre-merge PR
+                                print(DBJob.getRawBuild().getLog())
+                                // Fail the pipeline
+                                error "Databricks part2 result : " + DBJob.result
+                            }
+                        }
+                    }
+                } // end of Databricks IT part2
 
                 stage('Dummy stage: blue ocean log view') {
                     steps {
diff --git a/jenkins/Jenkinsfile-blossom.premerge-databricks b/jenkins/Jenkinsfile-blossom.premerge-databricks
index a13170f7162..5b0a2bf1226 100644
--- a/jenkins/Jenkinsfile-blossom.premerge-databricks
+++ b/jenkins/Jenkinsfile-blossom.premerge-databricks
@@ -46,10 +46,13 @@ pipeline {
     }
 
     parameters {
-        string(name: 'REF', defaultValue: '',
+        // Put a default value for REF to avoid error when running the pipeline manually
+        string(name: 'REF', defaultValue: 'main',
             description: 'Merged commit of specific PR')
         string(name: 'GITHUB_DATA', defaultValue: '',
             description: 'Json-formatted github data from upstream blossom-ci')
+        choice(name: 'TEST_MODE', choices: ['CI_PART1', 'CI_PART2'],
+            description: 'Separate integration tests into 2 parts, and run each part in parallell')
     }
 
     environment {
@@ -177,7 +180,7 @@ void databricksBuild() {
                 container('cpu') {
                     try {
                         withCredentials([file(credentialsId: 'SPARK_DATABRICKS_PRIVKEY', variable: 'DATABRICKS_PRIVKEY')]) {
-                            def TEST_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID" +
+                            def TEST_PARAMS = " -w $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID  -e TEST_MODE=$TEST_MODE" +
                                 " -p $DATABRICKS_PRIVKEY -l ./jenkins/databricks/test.sh -v $BASE_SPARK_VERSION -d /home/ubuntu/test.sh"
                             if (params.SPARK_CONF) {
                                 TEST_PARAMS += " -f ${params.SPARK_CONF}"
diff --git a/jenkins/databricks/init_cudf_udf.sh b/jenkins/databricks/init_cudf_udf.sh
index d5c440bfbb2..3c3e73ab582 100755
--- a/jenkins/databricks/init_cudf_udf.sh
+++ b/jenkins/databricks/init_cudf_udf.sh
@@ -20,7 +20,7 @@
 
 set -ex
 
-CUDF_VER=${CUDF_VER:-24.06} # TODO: https://github.com/NVIDIA/spark-rapids/issues/
+CUDF_VER=${CUDF_VER:-24.08}
 CUDA_VER=${CUDA_VER:-11.8}
 
 # Need to explicitly add conda into PATH environment, to activate conda environment.
diff --git a/jenkins/databricks/install_deps.py b/jenkins/databricks/install_deps.py
index be5cb9bc040..8d21a4f9556 100644
--- a/jenkins/databricks/install_deps.py
+++ b/jenkins/databricks/install_deps.py
@@ -115,8 +115,10 @@ def define_deps(spark_version, scala_version):
                  f'{prefix_ws_sp_mvn_hadoop}--org.json4s--json4s-jackson_{scala_version}--org.json4s__json4s-jackson_{scala_version}__*.jar'),
         Artifact('org.javaassist', 'javaassist',
                  f'{prefix_ws_sp_mvn_hadoop}--org.javassist--javassist--org.javassist__javassist__*.jar'),
-        Artifact('com.fasterxml.jackson.core', 'jackson-core',
+        Artifact('com.fasterxml.jackson.core', 'jackson-databind',
                  f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-databind--com.fasterxml.jackson.core__jackson-databind__*.jar'),
+        Artifact('com.fasterxml.jackson.core', 'jackson-core',
+                 f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-core--com.fasterxml.jackson.core__jackson-core__*.jar'),
         Artifact('com.fasterxml.jackson.core', 'jackson-annotations',
                  f'{prefix_ws_sp_mvn_hadoop}--com.fasterxml.jackson.core--jackson-annotations--com.fasterxml.jackson.core__jackson-annotations__*.jar'),
         Artifact('org.apache.spark', f'spark-avro_{scala_version}',
diff --git a/jenkins/databricks/test.sh b/jenkins/databricks/test.sh
index 404dcd97578..c966d5a92f7 100755
--- a/jenkins/databricks/test.sh
+++ b/jenkins/databricks/test.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -59,15 +59,13 @@ IS_SPARK_321_OR_LATER=0
 # - DELTA_LAKE_ONLY: delta_lake tests only
 # - MULTITHREADED_SHUFFLE: shuffle tests only
 # - PYARROW_ONLY: pyarrow tests only
+# - CI_PART1 or CI_PART2 : part1 or part2 of the tests run in parallel from CI
 TEST_MODE=${TEST_MODE:-'DEFAULT'}
 
 # Classloader config is here to work around classloader issues with
 # --packages in distributed setups, should be fixed by
 # https://github.com/NVIDIA/spark-rapids/pull/5646
 
-# Increase driver memory as Delta Lake tests can slowdown with default 1G (possibly due to caching?)
-DELTA_LAKE_CONFS="--driver-memory 2g"
-
 # Enable event log for qualification & profiling tools testing
 export PYSP_TEST_spark_eventLog_enabled=true
 mkdir -p /tmp/spark-events
@@ -89,32 +87,30 @@ run_pyarrow_tests() {
         bash integration_tests/run_pyspark_from_build.sh -m pyarrow_test --pyarrow_test --runtime_env="databricks" --test_type=$TEST_TYPE
 }
 
-## limit parallelism to avoid OOM kill
-export TEST_PARALLEL=${TEST_PARALLEL:-4}
-
-if [[ $TEST_MODE == "DEFAULT" ]]; then
+## Separate the integration tests into "CI_PART1" and "CI_PART2", run each part in parallel on separate Databricks clusters to speed up the testing process.
+if [[ $TEST_MODE == "DEFAULT" || $TEST_MODE == "CI_PART1" ]]; then
     bash integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE
+fi
 
+## Run tests with jars building from the spark-rapids source code
+if [[ "$(pwd)" == "$SOURCE_PATH" ]]; then
     ## Run cache tests
-    if [[ "$IS_SPARK_321_OR_LATER" -eq "1" ]]; then
+    if [[ "$IS_SPARK_321_OR_LATER" -eq "1" && ("$TEST_MODE" == "DEFAULT" || $TEST_MODE == "CI_PART2") ]]; then
         PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
             bash integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
     fi
-fi
 
-## Run tests with jars building from the spark-rapids source code
-if [ "$(pwd)" == "$SOURCE_PATH" ]; then
-    if [[ "$TEST_MODE" == "DEFAULT" || "$TEST_MODE" == "DELTA_LAKE_ONLY" ]]; then
+    if [[ "$TEST_MODE" == "DEFAULT" || $TEST_MODE == "CI_PART2" || "$TEST_MODE" == "DELTA_LAKE_ONLY" ]]; then
         ## Run Delta Lake tests
-        SPARK_SUBMIT_FLAGS="$SPARK_CONF $DELTA_LAKE_CONFS" TEST_PARALLEL=1 \
+        DRIVER_MEMORY="4g" \
             bash integration_tests/run_pyspark_from_build.sh --runtime_env="databricks"  -m "delta_lake" --delta_lake --test_type=$TEST_TYPE
     fi
 
-    if [[ "$TEST_MODE" == "DEFAULT" || "$TEST_MODE" == "MULTITHREADED_SHUFFLE" ]]; then
+    if [[ "$TEST_MODE" == "DEFAULT" || $TEST_MODE == "CI_PART2" || "$TEST_MODE" == "MULTITHREADED_SHUFFLE" ]]; then
         ## Mutithreaded Shuffle test
         rapids_shuffle_smoke_test
     fi
-    if [[ "$TEST_MODE" == "DEFAULT" || "$TEST_MODE" == "PYARROW_ONLY" ]]; then
+    if [[ "$TEST_MODE" == "DEFAULT" || $TEST_MODE == "CI_PART2" || "$TEST_MODE" == "PYARROW_ONLY" ]]; then
       # Pyarrow tests
       run_pyarrow_tests
     fi
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 883b3f3acfc..697722c0138 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -78,7 +78,7 @@ mvn_verify() {
     # Here run Python integration tests tagged with 'premerge_ci_1' only, that would help balance test duration and memory
     # consumption from two k8s pods running in parallel, which executes 'mvn_verify()' and 'ci_2()' respectively.
     $MVN_CMD -B $MVN_URM_MIRROR $PREMERGE_PROFILES clean verify -Dpytest.TEST_TAGS="premerge_ci_1" \
-        -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 -Dcuda.version=$CLASSIFIER
+        -Dpytest.TEST_TYPE="pre-commit" -Dcuda.version=$CLASSIFIER
 
     # The jacoco coverage should have been collected, but because of how the shade plugin
     # works and jacoco we need to clean some things up so jacoco will only report for the
@@ -162,7 +162,6 @@ ci_2() {
     $MVN_CMD -U -B $MVN_URM_MIRROR clean package $MVN_BUILD_ARGS -DskipTests=true
     export TEST_TAGS="not premerge_ci_1"
     export TEST_TYPE="pre-commit"
-    export TEST_PARALLEL=5
 
     # Download a Scala 2.12 build of spark
     prepare_spark $SPARK_VER 2.12
@@ -206,7 +205,6 @@ ci_scala213() {
     cd .. # Run integration tests in the project root dir to leverage test cases and resource files
     export TEST_TAGS="not premerge_ci_1"
     export TEST_TYPE="pre-commit"
-    export TEST_PARALLEL=5
     # SPARK_HOME (and related) must be set to a Spark built with Scala 2.13
     SPARK_HOME=$SPARK_HOME PYTHONPATH=$PYTHONPATH \
         ./integration_tests/run_pyspark_from_build.sh
diff --git a/jenkins/version-def.sh b/jenkins/version-def.sh
index d3c01e1eba4..dbad6d6fd94 100755
--- a/jenkins/version-def.sh
+++ b/jenkins/version-def.sh
@@ -27,7 +27,7 @@ done
 IFS=$PRE_IFS
 
 
-CUDF_VER=${CUDF_VER:-"24.06.0-SNAPSHOT"} # TODO: https://github.com/NVIDIA/spark-rapids/issues/
+CUDF_VER=${CUDF_VER:-"24.08.0-SNAPSHOT"}
 CUDA_CLASSIFIER=${CUDA_CLASSIFIER:-"cuda11"}
 CLASSIFIER=${CLASSIFIER:-"$CUDA_CLASSIFIER"} # default as CUDA_CLASSIFIER for compatibility
 PROJECT_VER=${PROJECT_VER:-"24.08.0-SNAPSHOT"}
diff --git a/pom.xml b/pom.xml
index df010a7589e..06947857521 100644
--- a/pom.xml
+++ b/pom.xml
@@ -719,9 +719,8 @@
         <spark.version.classifier>spark${buildver}</spark.version.classifier>
         <cuda.version>cuda11</cuda.version>
         <jni.classifier>${cuda.version}</jni.classifier>
-        <!-- TODO: https://github.com/NVIDIA/spark-rapids/issues/10867 -->
-        <spark-rapids-jni.version>24.06.0-SNAPSHOT</spark-rapids-jni.version>
-        <spark-rapids-private.version>24.06.0-SNAPSHOT</spark-rapids-private.version>
+        <spark-rapids-jni.version>24.08.0-SNAPSHOT</spark-rapids-jni.version>
+        <spark-rapids-private.version>24.08.0-SNAPSHOT</spark-rapids-private.version>
         <scala.binary.version>2.12</scala.binary.version>
         <alluxio.client.version>2.8.0</alluxio.client.version>
         <scala.recompileMode>incremental</scala.recompileMode>
@@ -887,6 +886,7 @@
             340,
             341,
             342,
+            343,
             350,
             351
         </noSnapshotScala213.buildvers>
diff --git a/scala2.13/pom.xml b/scala2.13/pom.xml
index 711872e8d54..cbc4aecbd26 100644
--- a/scala2.13/pom.xml
+++ b/scala2.13/pom.xml
@@ -719,9 +719,8 @@
         <spark.version.classifier>spark${buildver}</spark.version.classifier>
         <cuda.version>cuda11</cuda.version>
         <jni.classifier>${cuda.version}</jni.classifier>
-        <!-- TODO: https://github.com/NVIDIA/spark-rapids/issues/10867 -->
-        <spark-rapids-jni.version>24.06.0-SNAPSHOT</spark-rapids-jni.version>
-        <spark-rapids-private.version>24.06.0-SNAPSHOT</spark-rapids-private.version>
+        <spark-rapids-jni.version>24.08.0-SNAPSHOT</spark-rapids-jni.version>
+        <spark-rapids-private.version>24.08.0-SNAPSHOT</spark-rapids-private.version>
         <scala.binary.version>2.13</scala.binary.version>
         <alluxio.client.version>2.8.0</alluxio.client.version>
         <scala.recompileMode>incremental</scala.recompileMode>
@@ -887,6 +886,7 @@
             340,
             341,
             342,
+            343,
             350,
             351
         </noSnapshotScala213.buildvers>
diff --git a/scala2.13/shim-deps/databricks/pom.xml b/scala2.13/shim-deps/databricks/pom.xml
index b342f381c71..a0459901079 100644
--- a/scala2.13/shim-deps/databricks/pom.xml
+++ b/scala2.13/shim-deps/databricks/pom.xml
@@ -105,6 +105,12 @@
             <version>${spark.version}</version>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>${spark.version}</version>
+            <scope>compile</scope>
+        </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-annotations</artifactId>
@@ -286,4 +292,4 @@
             <scope>compile</scope>
         </dependency>
     </dependencies>
-</project>
\ No newline at end of file
+</project>
diff --git a/shim-deps/databricks/pom.xml b/shim-deps/databricks/pom.xml
index bef8a90d227..22842b0f7c0 100644
--- a/shim-deps/databricks/pom.xml
+++ b/shim-deps/databricks/pom.xml
@@ -105,6 +105,12 @@
             <version>${spark.version}</version>
             <scope>compile</scope>
         </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>${spark.version}</version>
+            <scope>compile</scope>
+        </dependency>
         <dependency>
             <groupId>com.fasterxml.jackson.core</groupId>
             <artifactId>jackson-annotations</artifactId>
@@ -286,4 +292,4 @@
             <scope>compile</scope>
         </dependency>
     </dependencies>
-</project>
\ No newline at end of file
+</project>
diff --git a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
index 36abc75ba87..2d7a51c4e43 100644
--- a/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
+++ b/sql-plugin-api/src/main/scala/com/nvidia/spark/rapids/ShimLoader.scala
@@ -40,19 +40,19 @@ import org.apache.spark.util.MutableURLClassLoader
     "parallel worlds" in the JDK's com.sun.istack.internal.tools.ParallelWorldClassLoader parlance
     1. a few publicly documented classes in the conventional layout at the top
     2. a large fraction of classes whose bytecode is identical under all supported Spark versions
-       in spark3xx-common
+       in spark-shared
     3. a smaller fraction of classes that differ under one of the supported Spark versions
     com/nvidia/spark/SQLPlugin.class
-    spark3xx-common/com/nvidia/spark/rapids/CastExprMeta.class
+    spark-shared/com/nvidia/spark/rapids/CastExprMeta.class
     spark311/org/apache/spark/sql/rapids/GpuUnaryMinus.class
     spark320/org/apache/spark/sql/rapids/GpuUnaryMinus.class
     Each shim can see a consistent parallel world without conflicts by referencing
     only one conflicting directory.
     E.g., Spark 3.2.0 Shim will use
-    jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark3xx-common/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark-shared/
     jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark320/
     Spark 3.1.1 will use
-    jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark3xx-common/
+    jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark-shared/
     jar:file:/home/spark/rapids-4-spark_2.12-24.08.0.jar!/spark311/
     Using these Jar URL's allows referencing different bytecode produced from identical sources
     by incompatible Scala / Spark dependencies.
@@ -67,7 +67,7 @@ object ShimLoader extends Logging {
     new URL(rootUrlStr)
   }
 
-  private val shimCommonURL = new URL(s"${shimRootURL.toString}spark3xx-common/")
+  private val shimCommonURL = new URL(s"${shimRootURL.toString}spark-shared/")
   @volatile private var shimProviderClass: String = _
   @volatile private var shimProvider: SparkShimServiceProvider = _
   @volatile private var shimURL: URL = _
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/CudfUnsafeRow.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/CudfUnsafeRow.java
deleted file mode 100644
index d25500a77b2..00000000000
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/CudfUnsafeRow.java
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.nvidia.spark.rapids;
-
-import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.catalyst.expressions.Attribute;
-import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader;
-import org.apache.spark.sql.catalyst.util.ArrayData;
-import org.apache.spark.sql.catalyst.util.MapData;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.unsafe.Platform;
-import org.apache.spark.unsafe.array.ByteArrayMethods;
-import org.apache.spark.unsafe.hash.Murmur3_x86_32;
-import org.apache.spark.unsafe.types.CalendarInterval;
-import org.apache.spark.unsafe.types.UTF8String;
-
-import java.util.Arrays;
-
-/**
- * This is an InternalRow implementation based off of UnsafeRow, but follows a format for use with
- * the row format supported by cudf.  In this format each column is padded to match the alignment
- * needed by it, and validity is placed at the end one byte at a time.
- *
- * It also supports remapping the columns so that if the columns were re-ordered to reduce packing
- * in the format, then they can be mapped back to their original positions.
- *
- * This class is likely to go away once we move to code generation when going directly to an
- * UnsafeRow through code generation. This is rather difficult because of some details in how
- * UnsafeRow works.
- */
-public final class CudfUnsafeRow extends InternalRow {
-  public static int alignOffset(int offset, int alignment) {
-    return (offset + alignment - 1) & -alignment;
-  }
-
-  public static int calculateBitSetWidthInBytes(int numFields) {
-    return (numFields + 7)/ 8;
-  }
-
-  public static int getRowSizeEstimate(Attribute[] attributes) {
-    // This needs to match what is in cudf and what is in the constructor.
-    int offset = 0;
-    for (Attribute attr : attributes) {
-      int length = GpuColumnVector.getNonNestedRapidsType(attr.dataType()).getSizeInBytes();
-      offset = alignOffset(offset, length);
-      offset += length;
-    }
-    int bitSetWidthInBytes = calculateBitSetWidthInBytes(attributes.length);
-    // Each row is 64-bit aligned
-    return alignOffset(offset + bitSetWidthInBytes, 8);
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-  // Private fields and methods
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Address of where the row is stored in off heap memory.
-   */
-  private long address;
-
-  /**
-   * For each column the starting location to read from. The index to the is the position in
-   * the row bytes, not the user faceing ordinal.
-   */
-  private int[] startOffsets;
-
-  /**
-   * At what point validity data starts.
-   */
-  private int fixedWidthSizeInBytes;
-
-  /**
-   * The size of this row's backing data, in bytes.
-   */
-  private int sizeInBytes;
-
-  /**
-   * A mapping from the user facing ordinal to the index in the underlying row.
-   */
-  private int[] remapping;
-
-  /**
-   * Get the address where a field is stored.
-   * @param ordinal the user facing ordinal.
-   * @return the address of the field.
-   */
-  private long getFieldAddressFromOrdinal(int ordinal) {
-    assertIndexIsValid(ordinal);
-    int i = remapping[ordinal];
-    return address + startOffsets[i];
-  }
-
-  /**
-   * Verify that index is valid for this row.
-   * @param index in this case the index can be either the user facing ordinal or the index into the
-   *              row.
-   */
-  private void assertIndexIsValid(int index) {
-    assert index >= 0 : "index (" + index + ") should >= 0";
-    assert index < startOffsets.length : "index (" + index + ") should < " + startOffsets.length;
-  }
-
-  //////////////////////////////////////////////////////////////////////////////
-  // Public methods
-  //////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Construct a new Row. The resulting row won't be usable until `pointTo()` has been called,
-   * since the value returned by this constructor is equivalent to a null pointer.
-   *
-   * @param attributes the schema of what this will hold.  This is the schema of the underlying
-   *                    row, so if columns were re-ordered it is the attributes of the reordered
-   *                    data.
-   * @param remapping a mapping from the user requested column to the underlying column in the
-   *                  backing row.
-   */
-  public CudfUnsafeRow(Attribute[] attributes, int[] remapping) {
-    int offset = 0;
-    startOffsets = new int[attributes.length];
-    for (int i = 0; i < attributes.length; i++) {
-      Attribute attr = attributes[i];
-      int length = GpuColumnVector.getNonNestedRapidsType(attr.dataType()).getSizeInBytes();
-      assert length > 0 : "Only fixed width types are currently supported.";
-      offset = alignOffset(offset, length);
-      startOffsets[i] = offset;
-      offset += length;
-    }
-    fixedWidthSizeInBytes = offset;
-    this.remapping = remapping;
-    assert startOffsets.length == remapping.length;
-  }
-
-  // for serializer
-  public CudfUnsafeRow() {}
-
-  @Override
-  public int numFields() { return startOffsets.length; }
-
-  /**
-   * Update this CudfUnsafeRow to point to different backing data.
-   *
-   * @param address the address in host memory for this.  We should change this to be a
-   *                MemoryBuffer class or something like that.
-   * @param sizeInBytes the size of this row's backing data, in bytes
-   */
-  public void pointTo(long address, int sizeInBytes) {
-    assert startOffsets != null && startOffsets.length > 0 : "startOffsets not properly initialized";
-    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
-    this.address = address;
-    this.sizeInBytes = sizeInBytes;
-  }
-
-  @Override
-  public void update(int ordinal, Object value) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public Object get(int ordinal, DataType dataType) {
-    // Don't remap the ordinal because it will be remapped in each of the other backing APIs
-    return SpecializedGettersReader.read(this, ordinal, dataType, true, true);
-  }
-
-  @Override
-  public boolean isNullAt(int ordinal) {
-    int i = remapping[ordinal];
-    assertIndexIsValid(i);
-    int validByteIndex = i / 8;
-    int validBitIndex = i % 8;
-    byte b = Platform.getByte(null, address + fixedWidthSizeInBytes + validByteIndex);
-    return ((1 << validBitIndex) & b) == 0;
-  }
-
-  @Override
-  public void setNullAt(int ordinal) {
-    int i = remapping[ordinal];
-    assertIndexIsValid(i);
-    int validByteIndex = i / 8;
-    int validBitIndex = i % 8;
-    byte b = Platform.getByte(null, address + fixedWidthSizeInBytes + validByteIndex);
-    b = (byte)((b & ~(1 << validBitIndex)) & 0xFF);
-    Platform.putByte(null, address + fixedWidthSizeInBytes + validByteIndex, b);
-  }
-
-  @Override
-  public boolean getBoolean(int ordinal) {
-    return Platform.getBoolean(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public byte getByte(int ordinal) {
-    return Platform.getByte(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public short getShort(int ordinal) {
-    return Platform.getShort(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public int getInt(int ordinal) {
-    return Platform.getInt(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public long getLong(int ordinal) {
-    return Platform.getLong(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public float getFloat(int ordinal) {
-    return Platform.getFloat(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public double getDouble(int ordinal) {
-    return Platform.getDouble(null, getFieldAddressFromOrdinal(ordinal));
-  }
-
-  @Override
-  public Decimal getDecimal(int ordinal, int precision, int scale) {
-    if (isNullAt(ordinal)) {
-      return null;
-    }
-    if (precision <= Decimal.MAX_INT_DIGITS()) {
-      return Decimal.createUnsafe(getInt(ordinal), precision, scale);
-    } else if (precision <= Decimal.MAX_LONG_DIGITS()) {
-      return Decimal.createUnsafe(getLong(ordinal), precision, scale);
-    } else {
-      throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-//      byte[] bytes = getBinary(ordinal);
-//      BigInteger bigInteger = new BigInteger(bytes);
-//      BigDecimal javaDecimal = new BigDecimal(bigInteger, scale);
-//      return Decimal.apply(javaDecimal, precision, scale);
-    }
-  }
-
-  @Override
-  public UTF8String getUTF8String(int ordinal) {
-//    if (isNullAt(ordinal)) return null;
-//    final long offsetAndSize = getLong(ordinal);
-//    final int offset = (int) (offsetAndSize >> 32);
-//    final int size = (int) offsetAndSize;
-//    return UTF8String.fromAddress(null, address + offset, size);
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public byte[] getBinary(int ordinal) {
-//    if (isNullAt(ordinal)) {
-//      return null;
-//    } else {
-//      final long offsetAndSize = getLong(ordinal);
-//      final int offset = (int) (offsetAndSize >> 32);
-//      final int size = (int) offsetAndSize;
-//      final byte[] bytes = new byte[size];
-//      Platform.copyMemory(
-//          null,
-//          address + offset,
-//          bytes,
-//          Platform.BYTE_ARRAY_OFFSET,
-//          size
-//      );
-//      return bytes;
-//    }
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public CalendarInterval getInterval(int ordinal) {
-//    if (isNullAt(ordinal)) {
-//      return null;
-//    } else {
-//      final long offsetAndSize = getLong(ordinal);
-//      final int offset = (int) (offsetAndSize >> 32);
-//      final int months = Platform.getInt(baseObject, address + offset);
-//      final int days = Platform.getInt(baseObject, address + offset + 4);
-//      final long microseconds = Platform.getLong(baseObject, address + offset + 8);
-//      return new CalendarInterval(months, days, microseconds);
-//    }
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public CudfUnsafeRow getStruct(int ordinal, int numFields) {
-//    if (isNullAt(ordinal)) {
-//      return null;
-//    } else {
-//      final long offsetAndSize = getLong(ordinal);
-//      final int offset = (int) (offsetAndSize >> 32);
-//      final int size = (int) offsetAndSize;
-//      final UnsafeRow row = new UnsafeRow(numFields);
-//      row.pointTo(baseObject, address + offset, size);
-//      return row;
-//    }
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public ArrayData getArray(int ordinal) {
-//    if (isNullAt(ordinal)) {
-//      return null;
-//    } else {
-//      final long offsetAndSize = getLong(ordinal);
-//      final int offset = (int) (offsetAndSize >> 32);
-//      final int size = (int) offsetAndSize;
-//      final UnsafeArrayData array = new UnsafeArrayData();
-//      array.pointTo(baseObject, address + offset, size);
-//      return array;
-//    }
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public MapData getMap(int ordinal) {
-//    if (isNullAt(ordinal)) {
-//      return null;
-//    } else {
-//      final long offsetAndSize = getLong(ordinal);
-//      final int offset = (int) (offsetAndSize >> 32);
-//      final int size = (int) offsetAndSize;
-//      final UnsafeMapData map = new UnsafeMapData();
-//      map.pointTo(baseObject, address + offset, size);
-//      return map;
-//    }
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  /**
-   * Copies this row, returning a self-contained UnsafeRow that stores its data in an internal
-   * byte array rather than referencing data stored in a data page.
-   */
-  @Override
-  public CudfUnsafeRow copy() {
-//    UnsafeRow rowCopy = new UnsafeRow(numFields);
-//    final byte[] rowDataCopy = new byte[sizeInBytes];
-//    Platform.copyMemory(
-//        baseObject,
-//        address,
-//        rowDataCopy,
-//        Platform.BYTE_ARRAY_OFFSET,
-//        sizeInBytes
-//    );
-//    rowCopy.pointTo(rowDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes);
-//    return rowCopy;
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-  }
-
-  @Override
-  public int hashCode() {
-    return Murmur3_x86_32.hashUnsafeWords(null, address, sizeInBytes, 42);
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    if (other instanceof CudfUnsafeRow) {
-      CudfUnsafeRow o = (CudfUnsafeRow) other;
-      return (sizeInBytes == o.sizeInBytes) &&
-          ByteArrayMethods.arrayEquals(null, address, null, o.address, sizeInBytes) &&
-          Arrays.equals(remapping, o.remapping);
-    }
-    return false;
-  }
-
-  // This is for debugging
-  @Override
-  public String toString() {
-    StringBuilder build = new StringBuilder("[");
-    for (int i = 0; i < sizeInBytes; i += 8) {
-      if (i != 0) build.append(',');
-      build.append(java.lang.Long.toHexString(Platform.getLong(null, address + i)));
-    }
-    build.append(']');
-    build.append(" remapped with ");
-    build.append(Arrays.toString(remapping));
-    return build.toString();
-  }
-
-  @Override
-  public boolean anyNull() {
-    throw new IllegalArgumentException("NOT IMPLEMENTED YET");
-//    return BitSetMethods.anySet(baseObject, address, bitSetWidthInBytes / 8);
-  }
-}
\ No newline at end of file
diff --git a/sql-plugin/src/main/java/com/nvidia/spark/rapids/InternalRowToColumnarBatchIterator.java b/sql-plugin/src/main/java/com/nvidia/spark/rapids/InternalRowToColumnarBatchIterator.java
index 9e532ba394a..0aa3f0978e9 100644
--- a/sql-plugin/src/main/java/com/nvidia/spark/rapids/InternalRowToColumnarBatchIterator.java
+++ b/sql-plugin/src/main/java/com/nvidia/spark/rapids/InternalRowToColumnarBatchIterator.java
@@ -35,6 +35,7 @@
 import ai.rapids.cudf.NvtxRange;
 import ai.rapids.cudf.Table;
 import com.nvidia.spark.rapids.jni.RowConversion;
+import com.nvidia.spark.rapids.shims.CudfUnsafeRow;
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.sql.catalyst.InternalRow;
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala
index 38b235a36f6..694d6dabbd6 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuColumnarToRowExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,7 +25,7 @@ import com.nvidia.spark.rapids.RapidsPluginImplicits._
 import com.nvidia.spark.rapids.RmmRapidsRetryIterator.{splitSpillableInHalfByRows, withRetryNoSplit}
 import com.nvidia.spark.rapids.ScalableTaskCompletion.onTaskCompletion
 import com.nvidia.spark.rapids.jni.RowConversion
-import com.nvidia.spark.rapids.shims.ShimUnaryExecNode
+import com.nvidia.spark.rapids.shims.{CudfUnsafeRow, ShimUnaryExecNode}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDataWritingCommandExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDataWritingCommandExec.scala
index 5a54d0b2f66..019f9b2e6b0 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDataWritingCommandExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuDataWritingCommandExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.GpuWriteJobStatsTracker
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SerializableConfiguration
 
@@ -84,10 +84,9 @@ object GpuDataWritingCommand {
       if (fs.exists(filePath) &&
           fs.getFileStatus(filePath).isDirectory &&
           fs.listStatus(filePath).length != 0) {
-        TrampolineUtil.throwAnalysisException(
-          s"CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory " +
-              s"${tablePath} . To allow overwriting the existing non-empty directory, " +
-              s"set '$allowNonEmptyLocationInCTASKey' to true.")
+        throw RapidsErrorUtils.
+          createTableAsSelectWithNonEmptyDirectoryError(tablePath.toString,
+            allowNonEmptyLocationInCTASKey)
       }
     }
   }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
index 1a799b43d1b..ee66b4d19df 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3797,14 +3797,6 @@ object GpuOverrides extends Logging {
         TypeSig.ARRAY.nested(TypeSig.all)),
       (e, conf, p, r) => new GpuGetArrayStructFieldsMeta(e, conf, p, r)
     ),
-    expr[RaiseError](
-      "Throw an exception",
-      ExprChecks.unaryProject(
-        TypeSig.NULL, TypeSig.NULL,
-        TypeSig.STRING, TypeSig.STRING),
-      (a, conf, p, r) => new UnaryExprMeta[RaiseError](a, conf, p, r) {
-        override def convertToGpu(child: Expression): GpuExpression = GpuRaiseError(child)
-      }),
     expr[DynamicPruningExpression](
       "Dynamic pruning expression marker",
       ExprChecks.unaryProject(TypeSig.all, TypeSig.all, TypeSig.BOOLEAN, TypeSig.BOOLEAN),
@@ -3820,7 +3812,8 @@ object GpuOverrides extends Logging {
   val expressions: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] =
     commonExpressions ++ TimeStamp.getExprs ++ GpuHiveOverrides.exprs ++
         ZOrderRules.exprs ++ DecimalArithmeticOverrides.exprs ++
-        BloomFilterShims.exprs ++ InSubqueryShims.exprs ++ SparkShimImpl.getExprs
+        BloomFilterShims.exprs ++ InSubqueryShims.exprs ++ RaiseErrorShim.exprs ++
+        SparkShimImpl.getExprs
 
   def wrapScan[INPUT <: Scan](
       scan: INPUT,
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
index e8ae977b1f6..25105386b3d 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuParquetFileFormat.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -321,7 +321,7 @@ class GpuParquetWriter(
         new GpuColumnVector(cv.dataType, deepTransformColumn(cv.getBase, cv.dataType))
             .asInstanceOf[org.apache.spark.sql.vectorized.ColumnVector]
       }
-      new ColumnarBatch(transformedCols)
+      new ColumnarBatch(transformedCols, batch.numRows())
     }
   }
 
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala
index 99f17cf341a..51b6645d7b7 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRowToColumnarExec.scala
@@ -19,7 +19,7 @@ package com.nvidia.spark.rapids
 import ai.rapids.cudf.{NvtxColor, NvtxRange}
 import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.GpuColumnVector.GpuColumnarBatchBuilder
-import com.nvidia.spark.rapids.shims.{GpuTypeShims, ShimUnaryExecNode}
+import com.nvidia.spark.rapids.shims.{CudfUnsafeRow, GpuTypeShims, ShimUnaryExecNode}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.broadcast.Broadcast
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRunnableCommandExec.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRunnableCommandExec.scala
index e3869960fc4..43bd593c0b5 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRunnableCommandExec.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuRunnableCommandExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.rapids.GpuWriteJobStatsTracker
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SerializableConfiguration
 
@@ -82,10 +82,9 @@ object GpuRunnableCommand {
       if (fs.exists(filePath) &&
           fs.getFileStatus(filePath).isDirectory &&
           fs.listStatus(filePath).length != 0) {
-        TrampolineUtil.throwAnalysisException(
-          s"CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory " +
-              s"${tablePath} . To allow overwriting the existing non-empty directory, " +
-              s"set '$allowNonEmptyLocationInCTASKey' to true.")
+        throw RapidsErrorUtils.
+          createTableAsSelectWithNonEmptyDirectoryError(tablePath.toString,
+            allowNonEmptyLocationInCTASKey)
       }
     }
   }
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
index 45d5e07dd73..1ca155f8a52 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RegexParser.scala
@@ -17,7 +17,6 @@ package com.nvidia.spark.rapids
 
 import java.sql.SQLException
 
-import scala.collection
 import scala.collection.mutable.ListBuffer
 
 import com.nvidia.spark.rapids.GpuOverrides.regexMetaChars
@@ -73,7 +72,7 @@ class RegexParser(pattern: String) {
     sequence
   }
 
-  def parseReplacementBase(): RegexAST = {
+  private def parseReplacementBase(): RegexAST = {
       consume() match {
         case '\\' =>
           parseBackrefOrEscaped()
@@ -782,6 +781,7 @@ class CudfRegexTranspiler(mode: RegexMode) {
     }
   }
 
+  @scala.annotation.tailrec
   private def isRepetition(e: RegexAST, checkZeroLength: Boolean): Boolean = {
     e match {
       case RegexRepetition(_, _) if !checkZeroLength => true
@@ -1648,6 +1648,7 @@ class CudfRegexTranspiler(mode: RegexMode) {
     }
   }
 
+  @scala.annotation.tailrec
   private def isEntirely(regex: RegexAST, f: RegexAST => Boolean): Boolean = {
     regex match {
       case RegexSequence(parts) if parts.nonEmpty =>
@@ -1672,6 +1673,7 @@ class CudfRegexTranspiler(mode: RegexMode) {
     })
   }
 
+  @scala.annotation.tailrec
   private def beginsWith(regex: RegexAST, f: RegexAST => Boolean): Boolean = {
     regex match {
       case RegexSequence(parts) if parts.nonEmpty =>
@@ -1687,6 +1689,7 @@ class CudfRegexTranspiler(mode: RegexMode) {
 
   }
 
+  @scala.annotation.tailrec
   private def endsWith(regex: RegexAST, f: RegexAST => Boolean): Boolean = {
     regex match {
       case RegexSequence(parts) if parts.nonEmpty =>
@@ -1760,7 +1763,7 @@ sealed case class RegexSequence(parts: ListBuffer[RegexAST]) extends RegexAST {
 }
 
 sealed case class RegexGroup(capture: Boolean, term: RegexAST,
-    val lookahead: Option[RegexLookahead])
+    lookahead: Option[RegexLookahead])
     extends RegexAST {
   def this(capture: Boolean, term: RegexAST) = {
     this(capture, term, None)
@@ -2023,11 +2026,13 @@ object RegexOptimizationType {
   case class Contains(literal: String) extends RegexOptimizationType
   case class PrefixRange(literal: String, length: Int, rangeStart: Int, rangeEnd: Int) 
     extends RegexOptimizationType
+  case class MultipleContains(literals: Seq[String]) extends RegexOptimizationType
   case object NoOptimization extends RegexOptimizationType
 }
 
 object RegexRewrite {
 
+  @scala.annotation.tailrec
   private def removeBrackets(astLs: collection.Seq[RegexAST]): collection.Seq[RegexAST] = {
     astLs match {
       case collection.Seq(RegexGroup(_, term, None)) => removeBrackets(term.children())
@@ -2044,7 +2049,7 @@ object RegexRewrite {
    */
   private def getPrefixRangePattern(astLs: collection.Seq[RegexAST]): 
       Option[(String, Int, Int, Int)] = {
-    val haveLiteralPrefix = isliteralString(astLs.dropRight(1))
+    val haveLiteralPrefix = isLiteralString(astLs.dropRight(1))
     val endsWithRange = astLs.lastOption match {
       case Some(RegexRepetition(
           RegexCharacterClass(false, ListBuffer(RegexCharacterRange(a,b))), 
@@ -2080,13 +2085,27 @@ object RegexRewrite {
     }
   }
 
-  private def isliteralString(astLs: collection.Seq[RegexAST]): Boolean = {
+  private def isLiteralString(astLs: collection.Seq[RegexAST]): Boolean = {
     removeBrackets(astLs).forall {
-      case RegexChar(ch) if !regexMetaChars.contains(ch) => true
+      case RegexChar(ch) => !regexMetaChars.contains(ch)
       case _ => false
     }
   }
 
+  private def getMultipleContainsLiterals(ast: RegexAST): Seq[String] = {
+    ast match {
+      case RegexGroup(_, term, _) => getMultipleContainsLiterals(term)
+      case RegexChoice(RegexSequence(parts), ls) if isLiteralString(parts) => {
+        getMultipleContainsLiterals(ls) match {
+          case Seq() => Seq.empty
+          case literals => RegexCharsToString(parts) +: literals
+        }
+      }
+      case RegexSequence(parts) if (isLiteralString(parts)) => Seq(RegexCharsToString(parts))
+      case _ => Seq.empty
+    }
+  }
+
   private def isWildcard(ast: RegexAST): Boolean = {
     ast match {
       case RegexRepetition(RegexChar('.'), SimpleQuantifier('*')) => true
@@ -2097,11 +2116,8 @@ object RegexRewrite {
   }
 
   private def stripLeadingWildcards(astLs: collection.Seq[RegexAST]): 
-      collection.Seq[RegexAST] = astLs match {
-    case (RegexChar('^') | RegexEscaped('A')) :: tail  =>
-      // if the pattern starts with ^ or \A, strip it too
-      tail.dropWhile(isWildcard)
-    case _ => astLs.dropWhile(isWildcard)
+      collection.Seq[RegexAST] = {
+    astLs.dropWhile(isWildcard)
   }
 
   private def stripTailingWildcards(astLs: collection.Seq[RegexAST]): 
@@ -2120,30 +2136,48 @@ object RegexRewrite {
    * Matches the given regex ast to a regex optimization type for regex rewrite
    * optimization.
    *
-   * @param ast The Abstract Syntax Tree parsed from a regex pattern.
+   * @param ast Abstract Syntax Tree parsed from a regex pattern.
    * @return The `RegexOptimizationType` for the given pattern.
    */
   def matchSimplePattern(ast: RegexAST): RegexOptimizationType = {
-    ast.children() match {
-      case (RegexChar('^') | RegexEscaped('A')) :: ast 
-          if isliteralString(stripTailingWildcards(ast)) => {
-        // ^literal.* => startsWith literal
-        RegexOptimizationType.StartsWith(RegexCharsToString(stripTailingWildcards(ast)))
+    val astLs = ast match {
+      case RegexSequence(_) => ast.children()
+      case _ => Seq(ast)
+    }
+    val noTailingWildcards = stripTailingWildcards(astLs)
+    if (noTailingWildcards.headOption.exists(
+        ast => ast == RegexChar('^') || ast == RegexEscaped('A'))) {
+      val possibleLiteral = noTailingWildcards.drop(1)
+      if (isLiteralString(possibleLiteral)) {
+        return RegexOptimizationType.StartsWith(RegexCharsToString(possibleLiteral))
       }
-      case astLs => {
-        val noStartsWithAst = stripTailingWildcards(stripLeadingWildcards(astLs))
-        val prefixRangeInfo = getPrefixRangePattern(noStartsWithAst)
-        if (prefixRangeInfo.isDefined) {
-          val (prefix, length, start, end) = prefixRangeInfo.get
-          // (literal[a-b]{x,y}) => prefix range pattern
-          RegexOptimizationType.PrefixRange(prefix, length, start, end)
-        } else if (isliteralString(noStartsWithAst)) {
-          // literal.* or (literal).* => contains literal
-          RegexOptimizationType.Contains(RegexCharsToString(noStartsWithAst))
-        } else {
-          RegexOptimizationType.NoOptimization
-        }
+    }
+
+    val noStartsWithAst = stripLeadingWildcards(noTailingWildcards)
+
+    // Check if the pattern is a contains literal pattern
+    if (isLiteralString(noStartsWithAst)) {
+      // literal or .*(literal).* => contains literal
+      return RegexOptimizationType.Contains(RegexCharsToString(noStartsWithAst))
+    }
+
+    // Check if the pattern is a multiple contains literal pattern (e.g. "abc|def|ghi")
+    if (noStartsWithAst.length == 1) {
+      val containsLiterals = getMultipleContainsLiterals(noStartsWithAst.head)
+      if (!containsLiterals.isEmpty) {
+        return RegexOptimizationType.MultipleContains(containsLiterals)
       }
     }
+
+    // Check if the pattern is a prefix range pattern (e.g. "abc[a-z]{3}")
+    val prefixRangeInfo = getPrefixRangePattern(noStartsWithAst)
+    if (prefixRangeInfo.isDefined) {
+      val (prefix, length, start, end) = prefixRangeInfo.get
+      // (literal[a-b]{x,y}) => prefix range pattern
+      return RegexOptimizationType.PrefixRange(prefix, length, start, end)
+    }
+    
+    // return NoOptimization if the pattern is not a simple pattern and use cuDF
+    RegexOptimizationType.NoOptimization
   }
-}
\ No newline at end of file
+}
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveTextFileFormat.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala
similarity index 54%
rename from sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveTextFileFormat.scala
rename to sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala
index 4595ea87ed3..21437a64481 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveTextFileFormat.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,8 +17,9 @@
 package org.apache.spark.sql.hive.rapids
 
 import java.nio.charset.Charset
+import java.util.Locale
 
-import ai.rapids.cudf.{CSVWriterOptions, DType, QuoteStyle, Scalar, Table, TableWriter => CudfTableWriter}
+import ai.rapids.cudf.{CompressionType, CSVWriterOptions, DType, ParquetWriterOptions, QuoteStyle, Scalar, Table, TableWriter => CudfTableWriter}
 import com.google.common.base.Charsets
 import com.nvidia.spark.rapids._
 import com.nvidia.spark.rapids.Arm.withResource
@@ -27,14 +28,85 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.hive.rapids.GpuHiveTextFileUtils._
+import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.hive.rapids.shims.GpuInsertIntoHiveTableMeta
-import org.apache.spark.sql.types.{DataType, StringType, StructType}
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.types.{DataType, Decimal, DecimalType, StringType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
-object GpuHiveTextFileFormat extends Logging {
+object GpuHiveFileFormat extends Logging {
+  private val parquetOutputFormatClass =
+    "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat"
+  private val parquetSerdeClass =
+    "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
 
-  private def checkIfEnabled(meta: GpuInsertIntoHiveTableMeta): Unit = {
+  def tagGpuSupport(meta: GpuInsertIntoHiveTableMeta): Option[ColumnarFileFormat] = {
+    val insertCmd = meta.wrapped
+    // Bucketing write
+    if (insertCmd.table.bucketSpec.isDefined) {
+      meta.willNotWorkOnGpu("bucketed tables are not supported yet")
+    }
+
+    // Infer the file format from the serde string, similar as what Spark does in
+    // RelationConversions for Hive.
+    val serde = insertCmd.table.storage.serde.getOrElse("").toLowerCase(Locale.ROOT)
+    val tempFileFormat = if (serde.contains("parquet")) {
+      // Parquet specific tagging
+      tagGpuSupportForParquet(meta)
+    } else {
+      // Default to text file format
+      tagGpuSupportForText(meta)
+    }
+
+    if (meta.canThisBeReplaced) {
+      Some(tempFileFormat)
+    } else {
+      None
+    }
+  }
+
+  private def tagGpuSupportForParquet(meta: GpuInsertIntoHiveTableMeta): ColumnarFileFormat = {
+    val insertCmd = meta.wrapped
+    val storage = insertCmd.table.storage
+
+    if (storage.outputFormat.getOrElse("") != parquetOutputFormatClass) {
+      meta.willNotWorkOnGpu(s"unsupported output format found: ${storage.outputFormat}, " +
+        s"only $parquetOutputFormatClass is currently supported for Parquet")
+    }
+    if (storage.serde.getOrElse("") != parquetSerdeClass) {
+      meta.willNotWorkOnGpu(s"unsupported serde found: ${storage.serde}, " +
+        s"only $parquetSerdeClass is currently supported for Parquet")
+    }
+
+    // Decimal type check
+    val hasIntOrLongBackedDec = insertCmd.query.schema.exists { field =>
+      TrampolineUtil.dataTypeExistsRecursively(field.dataType, {
+        case dec: DecimalType if dec.precision <= Decimal.MAX_LONG_DIGITS => true
+        case _ => false
+      })
+    }
+    if (hasIntOrLongBackedDec) {
+      meta.willNotWorkOnGpu("decimals that fit in a long are not supported " +
+        s"for Parquet. Hive always writes decimals as binary arrays but the GPU writes them " +
+        s"as integral types")
+    }
+
+    FileFormatChecks.tag(meta, insertCmd.table.schema, ParquetFormatType, WriteFileOp)
+
+    // Compression type
+    val parquetOptions = new ParquetOptions(insertCmd.table.properties, insertCmd.conf)
+    val compressionType =
+      GpuParquetFileFormat.parseCompressionType(parquetOptions.compressionCodecClassName)
+        .getOrElse {
+          meta.willNotWorkOnGpu("compression codec " +
+            s"${parquetOptions.compressionCodecClassName} is not supported for Parquet")
+          CompressionType.NONE
+        }
+    new GpuHiveParquetFileFormat(compressionType)
+  }
+
+  private def tagGpuSupportForText(meta: GpuInsertIntoHiveTableMeta): ColumnarFileFormat = {
+    import org.apache.spark.sql.hive.rapids.GpuHiveTextFileUtils._
     if (!meta.conf.isHiveDelimitedTextEnabled) {
       meta.willNotWorkOnGpu("Hive text I/O has been disabled. To enable this, " +
         s"set ${RapidsConf.ENABLE_HIVE_TEXT} to true")
@@ -43,21 +115,16 @@ object GpuHiveTextFileFormat extends Logging {
       meta.willNotWorkOnGpu("writing Hive delimited text tables has been disabled, " +
         s"to enable this, set ${RapidsConf.ENABLE_HIVE_TEXT_WRITE} to true")
     }
-  }
-
-  def tagGpuSupport(meta: GpuInsertIntoHiveTableMeta)
-  : Option[ColumnarFileFormat] = {
-    checkIfEnabled(meta)
 
     val insertCommand = meta.wrapped
     val storage  = insertCommand.table.storage
     if (storage.outputFormat.getOrElse("") != textOutputFormat) {
       meta.willNotWorkOnGpu(s"unsupported output-format found: ${storage.outputFormat}, " +
-        s"only $textOutputFormat is currently supported")
+        s"only $textOutputFormat is currently supported for text")
     }
     if (storage.serde.getOrElse("") != lazySimpleSerDe) {
       meta.willNotWorkOnGpu(s"unsupported serde found: ${storage.serde}, " +
-        s"only $lazySimpleSerDe is currently supported")
+        s"only $lazySimpleSerDe is currently supported for text")
     }
 
     val serializationFormat = storage.properties.getOrElse(serializationKey, "1")
@@ -86,28 +153,60 @@ object GpuHiveTextFileFormat extends Logging {
       meta.willNotWorkOnGpu("only UTF-8 is supported as the charset")
     }
 
-    if (insertCommand.table.bucketSpec.isDefined) {
-      meta.willNotWorkOnGpu("bucketed tables are not supported")
-    }
-
-    if (insertCommand.conf.getConfString("hive.exec.compress.output", "false").toLowerCase
-          != "false") {
+    if (insertCommand.conf.getConfString("hive.exec.compress.output", "false").toBoolean) {
       meta.willNotWorkOnGpu("compressed output is not supported, " +
         "set hive.exec.compress.output to false to enable writing Hive text via GPU")
     }
 
-    FileFormatChecks.tag(meta,
-                         insertCommand.table.schema,
-                         HiveDelimitedTextFormatType,
-                         WriteFileOp)
+    FileFormatChecks.tag(meta, insertCommand.table.schema, HiveDelimitedTextFormatType,
+      WriteFileOp)
 
-    Some(new GpuHiveTextFileFormat())
+    new GpuHiveTextFileFormat()
   }
 }
 
+class GpuHiveParquetFileFormat(compType: CompressionType) extends ColumnarFileFormat {
+
+  override def prepareWrite(sparkSession: SparkSession, job: Job,
+      options: Map[String, String], dataSchema: StructType): ColumnarOutputWriterFactory = {
+
+    // Avoid referencing the outer object.
+    val compressionType = compType
+    new ColumnarOutputWriterFactory {
+      override def getFileExtension(context: TaskAttemptContext): String =
+        compressionType match {
+          case CompressionType.NONE => ".parquet"
+          case ct => s".${ct.name().toLowerCase(Locale.ROOT)}.parquet"
+        }
+
+      override def newInstance(path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): ColumnarOutputWriter = {
+        new GpuHiveParquetWriter(path, dataSchema, context, compressionType)
+      }
+    }
+  }
+}
+
+class GpuHiveParquetWriter(override val path: String, dataSchema: StructType,
+    context: TaskAttemptContext, compType: CompressionType)
+  extends ColumnarOutputWriter(context, dataSchema, "HiveParquet", true) {
+
+  override protected val tableWriter: CudfTableWriter = {
+    val optionsBuilder = SchemaUtils
+      .writerOptionsFromSchema(ParquetWriterOptions.builder(), dataSchema,
+        writeInt96 = true,      // Hive 1.2 write timestamp as INT96
+        parquetFieldIdEnabled = false)
+      .withCompressionType(compType)
+    Table.writeParquetChunked(optionsBuilder.build(), this)
+  }
+
+}
+
 class GpuHiveTextFileFormat extends ColumnarFileFormat with Logging {
 
-  override def supportDataType(dataType: DataType): Boolean = isSupportedType(dataType)
+  override def supportDataType(dataType: DataType): Boolean =
+    GpuHiveTextFileUtils.isSupportedType(dataType)
 
   override def prepareWrite(sparkSession: SparkSession,
                             job: Job,
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/RapidsHiveErrors.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/RapidsHiveErrors.scala
index 259a04ec318..40cac90680f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/RapidsHiveErrors.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/RapidsHiveErrors.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,9 +19,9 @@ package org.apache.spark.sql.hive.rapids
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types.{DataType, DoubleType, FloatType, StringType}
 
 object RapidsHiveErrors {
@@ -53,8 +53,7 @@ object RapidsHiveErrors {
   }
 
   def cannotResolveAttributeError(name: String, outputStr: String): Throwable = {
-    new AnalysisException(
-      s"Unable to resolve $name given [$outputStr]")
+    throw RapidsErrorUtils.cannotResolveAttributeError(name, outputStr)
   }
 
   def writePartitionExceedConfigSizeWhenDynamicPartitionError(
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
index 0ec720733e8..5589bca0435 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuDataSourceBase.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -44,7 +44,7 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{RateStreamProvider, TextSocketSourceProvider}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.shims.SchemaUtilsShims
+import org.apache.spark.sql.rapids.shims.{RapidsErrorUtils, SchemaUtilsShims}
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.{HadoopFSUtils, ThreadUtils, Utils}
@@ -144,8 +144,8 @@ abstract class GpuDataSourceBase(
             }
             inferredOpt
           }.getOrElse {
-            throw new AnalysisException(s"Failed to resolve the schema for $format for " +
-              s"the partition column: $partitionColumn. It must be specified manually.")
+            throw RapidsErrorUtils.
+              partitionColumnNotSpecifiedError(format.toString, partitionColumn)
           }
         }
         StructType(partitionFields)
@@ -162,8 +162,7 @@ abstract class GpuDataSourceBase(
         caseInsensitiveOptions - "path",
         SparkShimImpl.filesFromFileIndex(tempFileIndex))
     }.getOrElse {
-      throw new AnalysisException(
-        s"Unable to infer schema for $format. It must be specified manually.")
+      throw RapidsErrorUtils.dataSchemaNotSpecifiedError(format.toString)
     }
 
     // We just print a waring message if the data schema and partition schema have the duplicate
@@ -201,17 +200,13 @@ abstract class GpuDataSourceBase(
       case (dataSource: RelationProvider, None) =>
         dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
       case (_: SchemaRelationProvider, None) =>
-        throw new AnalysisException(s"A schema needs to be specified when using $className.")
+        throw RapidsErrorUtils.schemaNotSpecifiedForSchemaRelationProviderError(className)
       case (dataSource: RelationProvider, Some(schema)) =>
         val baseRelation =
           dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
         if (!DataType.equalsIgnoreCompatibleNullability(baseRelation.schema, schema)) {
-          throw new AnalysisException(
-            "The user-specified schema doesn't match the actual schema: " +
-            s"user-specified: ${schema.toDDL}, actual: ${baseRelation.schema.toDDL}. If " +
-            "you're using DataFrameReader.schema API or creating a table, please do not " +
-            "specify the schema. Or if you're scanning an existed table, please drop " +
-            "it and re-create it.")
+          throw RapidsErrorUtils.userSpecifiedSchemaMismatchActualSchemaError(schema,
+            baseRelation.schema)
         }
         baseRelation
 
@@ -233,9 +228,8 @@ abstract class GpuDataSourceBase(
             caseInsensitiveOptions - "path",
             SparkShimImpl.filesFromFileIndex(fileCatalog))
         }.getOrElse {
-          throw new AnalysisException(
-            s"Unable to infer schema for $format at ${fileCatalog.allFiles().mkString(",")}. " +
-                "It must be specified manually")
+          throw RapidsErrorUtils.
+            dataSchemaNotSpecifiedError(format.toString, fileCatalog.allFiles().mkString(","))
         }
 
         HadoopFsRelation(
@@ -276,8 +270,7 @@ abstract class GpuDataSourceBase(
           caseInsensitiveOptions)(sparkSession)
 
       case _ =>
-        throw new AnalysisException(
-          s"$className is not a valid Spark SQL Data Source.")
+        throw RapidsErrorUtils.invalidDataSourceError(className)
     }
 
     relation match {
@@ -411,22 +404,13 @@ object GpuDataSourceBase extends Logging {
                 dataSource
               case Failure(error) =>
                 if (provider1.startsWith("org.apache.spark.sql.hive.orc")) {
-                  throw new AnalysisException(
-                    "Hive built-in ORC data source must be used with Hive support enabled. " +
-                    "Please use the native ORC data source by setting 'spark.sql.orc.impl' to " +
-                    "'native'")
+                  throw RapidsErrorUtils.orcNotUsedWithHiveEnabledError()
                 } else if (provider1.toLowerCase(Locale.ROOT) == "avro" ||
                   provider1 == "com.databricks.spark.avro" ||
                   provider1 == "org.apache.spark.sql.avro") {
-                  throw new AnalysisException(
-                    s"Failed to find data source: $provider1. Avro is built-in but external data " +
-                    "source module since Spark 2.4. Please deploy the application as per " +
-                    "the deployment section of \"Apache Avro Data Source Guide\".")
+                  throw RapidsErrorUtils.failedToFindAvroDataSourceError(provider1)
                 } else if (provider1.toLowerCase(Locale.ROOT) == "kafka") {
-                  throw new AnalysisException(
-                    s"Failed to find data source: $provider1. Please deploy the application as " +
-                    "per the deployment section of " +
-                    "\"Structured Streaming + Kafka Integration Guide\".")
+                  throw RapidsErrorUtils.failedToFindKafkaDataSourceError(provider1)
                 } else {
                   throw new ClassNotFoundException(
                     s"Failed to find data source: $provider1. Please find packages at " +
@@ -459,8 +443,7 @@ object GpuDataSourceBase extends Logging {
               s"defaulting to the internal datasource (${internalSources.head.getClass.getName}).")
             internalSources.head.getClass
           } else {
-            throw new AnalysisException(s"Multiple sources found for $provider1 " +
-              s"(${sourceNames.mkString(", ")}), please specify the fully qualified class name.")
+            throw RapidsErrorUtils.findMultipleDataSourceError(provider1, sourceNames)
           }
       }
     } catch {
@@ -513,7 +496,7 @@ object GpuDataSourceBase extends Logging {
           }
 
           if (checkEmptyGlobPath && globResult.isEmpty) {
-            throw new AnalysisException(s"Path does not exist: $globPath")
+            throw RapidsErrorUtils.dataPathNotExistError(globPath.toString)
           }
 
           globResult
@@ -527,7 +510,7 @@ object GpuDataSourceBase extends Logging {
         ThreadUtils.parmap(nonGlobPaths, "checkPathsExist", numThreads) { path =>
           val fs = path.getFileSystem(hadoopConf)
           if (!fs.exists(path)) {
-            throw new AnalysisException(s"Path does not exist: $path")
+            throw RapidsErrorUtils.dataPathNotExistError(path.toString)
           }
         }
       } catch {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala
index 2b7974fd1a6..ece5ef5acf5 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ import com.nvidia.spark.rapids.{ColumnarFileFormat, GpuDataWritingCommand}
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.io.FileCommitProtocol
-import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
+import org.apache.spark.sql.{SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.getPartitionPathString
@@ -33,7 +33,7 @@ import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.{AlterTableAddPartitionCommand, AlterTableDropPartitionCommand, CommandUtils}
 import org.apache.spark.sql.execution.datasources.{FileFormatWriter, FileIndex, PartitioningUtils}
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
-import org.apache.spark.sql.rapids.shims.SchemaUtilsShims
+import org.apache.spark.sql.rapids.shims.{RapidsErrorUtils, SchemaUtilsShims}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuInsertIntoHadoopFsRelationCommand(
@@ -121,7 +121,7 @@ case class GpuInsertIntoHadoopFsRelationCommand(
       val pathExists = fs.exists(qualifiedOutputPath)
       (mode, pathExists) match {
         case (SaveMode.ErrorIfExists, true) =>
-          throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
+          throw RapidsErrorUtils.outputPathAlreadyExistsError(qualifiedOutputPath)
         case (SaveMode.Overwrite, true) =>
           if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
             false
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
index 6675f678f6d..f9d0be81505 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/catalyst/expressions/GpuRandomExpressions.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,8 +23,8 @@ import com.nvidia.spark.rapids.Arm.withResource
 import com.nvidia.spark.rapids.shims.ShimUnaryExpression
 
 import org.apache.spark.TaskContext
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionWithRandomSeed}
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
@@ -52,7 +52,7 @@ case class GpuRand(child: Expression) extends ShimUnaryExpression with GpuExpres
   @transient protected lazy val seed: Long = child match {
     case GpuLiteral(s, IntegerType) => s.asInstanceOf[Int]
     case GpuLiteral(s, LongType) => s.asInstanceOf[Long]
-    case _ => throw new AnalysisException(
+    case _ => throw new RapidsAnalysisException(
       s"Input argument to $prettyName must be an integer, long or null literal.")
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
index 7f0a82517c3..41c2e5e3776 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/collectionOperations.scala
@@ -49,8 +49,8 @@ case class GpuConcat(children: Seq[Expression]) extends GpuComplexTypeMergingExp
 
   override def columnarEval(batch: ColumnarBatch): GpuColumnVector = {
     val res = dataType match {
-      // Explicitly return null for empty concat as Spark, since cuDF doesn't support empty concat.
-      case dt if children.isEmpty => GpuScalar.from(null, dt)
+      // in Spark concat() will be considered as an empty string here
+      case dt if children.isEmpty => GpuScalar("", dt)
       // For single column concat, we pass the result of child node to avoid extra cuDF call.
       case _ if children.length == 1 => children.head.columnarEval(batch)
       case StringType => stringConcat(batch)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
index 5ffe08348f1..8a88cc4024d 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/TrampolineUtil.scala
@@ -157,9 +157,6 @@ object TrampolineUtil {
     TaskContext.get.taskMemoryManager()
   }
 
-  /** Throw a Spark analysis exception */
-  def throwAnalysisException(msg: String) = throw new AnalysisException(msg)
-
   /** Set the task context for the current thread */
   def setTaskContext(tc: TaskContext): Unit = TaskContext.setTaskContext(tc)
 
@@ -241,4 +238,13 @@ object TrampolineUtil {
   }
 
   def getSparkHadoopUtilConf: Configuration = SparkHadoopUtil.get.conf
+
 }
+
+/**
+ * This class is to only be used to throw errors specific to the
+ * RAPIDS Accelerator or errors mirroring Spark where a raw
+ * AnalysisException is thrown directly rather than via an error
+ * utility class (this should be rare).
+ */
+class RapidsAnalysisException(msg: String) extends AnalysisException(msg)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuAggregateInPandasExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuAggregateInPandasExec.scala
index bc2f30dff2f..639a39bcd38 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuAggregateInPandasExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuAggregateInPandasExec.scala
@@ -75,15 +75,15 @@ case class GpuAggregateInPandasExec(
   }
 
   private def collectFunctions(udf: GpuPythonFunction):
-  (ChainedPythonFunctions, Seq[Expression]) = {
+  ((ChainedPythonFunctions, Long), Seq[Expression]) = {
     udf.children match {
       case Seq(u: GpuPythonFunction) =>
-        val (chained, children) = collectFunctions(u)
-        (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
+        val ((chained, _), children) = collectFunctions(u)
+        ((ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), udf.resultId.id), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
         assert(children.forall(_.find(_.isInstanceOf[GpuPythonFunction]).isEmpty))
-        (ChainedPythonFunctions(Seq(udf.func)), udf.children)
+        ((ChainedPythonFunctions(Seq(udf.func)), udf.resultId.id), udf.children)
     }
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala
index 182d7d1b6c6..c99d0403ed0 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuArrowEvalPythonExec.scala
@@ -362,15 +362,16 @@ case class GpuArrowEvalPythonExec(
 
   override def producedAttributes: AttributeSet = AttributeSet(resultAttrs)
 
-  private def collectFunctions(udf: GpuPythonUDF): (ChainedPythonFunctions, Seq[Expression]) = {
+  private def collectFunctions(
+      udf: GpuPythonUDF): ((ChainedPythonFunctions, Long), Seq[Expression]) = {
     udf.children match {
       case Seq(u: GpuPythonUDF) =>
-        val (chained, children) = collectFunctions(u)
-        (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
+        val ((chained, _), children) = collectFunctions(u)
+        ((ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), udf.resultId.id), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
         assert(children.forall(_.find(_.isInstanceOf[GpuPythonUDF]).isEmpty))
-        (ChainedPythonFunctions(Seq(udf.func)), udf.children)
+        ((ChainedPythonFunctions(Seq(udf.func)), udf.resultId.id), udf.children)
     }
   }
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapCoGroupsInPandasExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapCoGroupsInPandasExec.scala
index b8fa3c1ab69..2e90765e40e 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapCoGroupsInPandasExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapCoGroupsInPandasExec.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -107,8 +107,8 @@ case class GpuFlatMapCoGroupsInPandasExec(
 
   private val sessionLocalTimeZone = conf.sessionLocalTimeZone
   private val pythonRunnerConf = ArrowUtilsShim.getPythonRunnerConfMap(conf)
-  private val pandasFunction = udf.asInstanceOf[GpuPythonUDF].func
-  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
+  private val pyUDF = udf.asInstanceOf[GpuPythonUDF]
+  private val chainedFunc = Seq((ChainedPythonFunctions(Seq(pyUDF.func)), pyUDF.resultId.id))
 
   override def producedAttributes: AttributeSet = AttributeSet(output)
 
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapGroupsInPandasExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapGroupsInPandasExec.scala
index 4a24a449b24..f1596ae7a74 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapGroupsInPandasExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuFlatMapGroupsInPandasExec.scala
@@ -98,7 +98,7 @@ case class GpuFlatMapGroupsInPandasExec(
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
     Seq(groupingAttributes.map(SortOrder(_, Ascending)))
 
-  private val pandasFunction = func.asInstanceOf[GpuPythonUDF].func
+  private val udf = func.asInstanceOf[GpuPythonUDF]
 
   // One batch as input to keep the integrity for each group
   override def childrenCoalesceGoal: Seq[CoalesceGoal] = Seq(RequireSingleBatch)
@@ -111,7 +111,7 @@ case class GpuFlatMapGroupsInPandasExec(
     val (mNumInputRows, mNumInputBatches, mNumOutputRows, mNumOutputBatches) = commonGpuMetrics()
 
     lazy val isPythonOnGpuEnabled = GpuPythonHelper.isPythonOnGpuEnabled(conf)
-    val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
+    val chainedFunc = Seq((ChainedPythonFunctions(Seq(udf.func)), udf.resultId.id))
     val localOutput = output
     val localChildOutput = child.output
     // Python wraps the resulting columns in a single struct column.
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuMapInBatchExec.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuMapInBatchExec.scala
index 4d41cd32e4f..57c1c7f7114 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuMapInBatchExec.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuMapInBatchExec.scala
@@ -46,7 +46,7 @@ trait GpuMapInBatchExec extends ShimUnaryExecNode with GpuPythonExecBase {
 
   protected val isBarrier: Boolean
 
-  private val pandasFunction = func.asInstanceOf[GpuPythonUDF].func
+  private val udf = func.asInstanceOf[GpuPythonUDF]
 
   override def producedAttributes: AttributeSet = AttributeSet(output)
 
@@ -58,7 +58,7 @@ trait GpuMapInBatchExec extends ShimUnaryExecNode with GpuPythonExecBase {
     val (numInputRows, numInputBatches, numOutputRows, numOutputBatches) = commonGpuMetrics()
 
     val pyInputTypes = child.schema
-    val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
+    val chainedFunc = Seq((ChainedPythonFunctions(Seq(udf.func)), udf.resultId.id))
     val sessionLocalTimeZone = conf.sessionLocalTimeZone
     val pythonRunnerConf = ArrowUtilsShim.getPythonRunnerConfMap(conf)
     val isPythonOnGpuEnabled = GpuPythonHelper.isPythonOnGpuEnabled(conf)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonHelper.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonHelper.scala
index 451ae401891..8564018ad3b 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonHelper.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonHelper.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -86,11 +86,12 @@ object GpuPythonHelper extends Logging {
   }
 
   // Called in each task at the executor side
-  def injectGpuInfo(funcs: Seq[ChainedPythonFunctions], isPythonOnGpuEnabled: Boolean): Unit = {
+  def injectGpuInfo(funcs: Seq[(ChainedPythonFunctions, Long)],
+      isPythonOnGpuEnabled: Boolean): Unit = {
     // Insert GPU related env(s) into `envVars` for all the PythonFunction(s).
     // Yes `PythonRunner` will only use the first one, but just make sure it will
     // take effect no matter the order changes or not.
-    funcs.foreach(_.funcs.foreach { pyF =>
+    funcs.foreach(_._1.funcs.foreach { pyF =>
       pyF.envVars.put("CUDA_VISIBLE_DEVICES", gpuId)
       pyF.envVars.put("RAPIDS_PYTHON_ENABLED", isPythonOnGpuEnabled.toString)
       pyF.envVars.put("RAPIDS_UVM_ENABLED", isPythonUvmEnabled)
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonUDF.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonUDF.scala
index 6cb955a6db8..04367d9f29f 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonUDF.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuPythonUDF.scala
@@ -64,7 +64,7 @@ abstract class GpuPythonFunction(
     children: Seq[Expression],
     evalType: Int,
     udfDeterministic: Boolean,
-    resultId: ExprId = NamedExpression.newExprId)
+    val resultId: ExprId = NamedExpression.newExprId)
   extends Expression with GpuUnevaluable with NonSQLExpression
     with UserDefinedExpression with GpuAggregateWindowFunction with Serializable {
 
@@ -94,7 +94,7 @@ case class GpuPythonUDF(
     children: Seq[Expression],
     evalType: Int,
     udfDeterministic: Boolean,
-    resultId: ExprId = NamedExpression.newExprId)
+    override val resultId: ExprId = NamedExpression.newExprId)
   extends GpuPythonFunction(name, func, dataType, children, evalType, udfDeterministic, resultId) {
   override lazy val canonicalized: Expression = {
     val canonicalizedChildren = children.map(_.canonicalized)
@@ -110,7 +110,7 @@ case class GpuPythonUDAF(
     children: Seq[Expression],
     evalType: Int,
     udfDeterministic: Boolean,
-    resultId: ExprId = NamedExpression.newExprId)
+    override val resultId: ExprId = NamedExpression.newExprId)
   extends GpuPythonFunction(name, func, dataType, children, evalType, udfDeterministic, resultId)
     with GpuAggregateFunction {
   override lazy val canonicalized: Expression = {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuWindowInPandasExecBase.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuWindowInPandasExecBase.scala
index 3bc91cd6338..fcf9570a9f7 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuWindowInPandasExecBase.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/execution/python/GpuWindowInPandasExecBase.scala
@@ -235,16 +235,16 @@ trait GpuWindowInPandasExecBase extends ShimUnaryExecNode with GpuPythonExecBase
 
   protected val windowBoundTypeConf = "pandas_window_bound_types"
 
-  protected def collectFunctions(udf: GpuPythonFunction):
-  (ChainedPythonFunctions, Seq[Expression]) = {
+  protected def collectFunctions(
+      udf: GpuPythonFunction): ((ChainedPythonFunctions, Long), Seq[Expression]) = {
     udf.children match {
       case Seq(u: GpuPythonFunction) =>
-        val (chained, children) = collectFunctions(u)
-        (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
+        val ((chained, _), children) = collectFunctions(u)
+        ((ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), udf.resultId.id), children)
       case children =>
         // There should not be any other UDFs, or the children can't be evaluated directly.
         assert(children.forall(_.find(_.isInstanceOf[GpuPythonFunction]).isEmpty))
-        (ChainedPythonFunctions(Seq(udf.func)), udf.children)
+        ((ChainedPythonFunctions(Seq(udf.func)), udf.resultId.id), udf.children)
     }
   }
 
@@ -396,7 +396,7 @@ trait GpuWindowInPandasExecBase extends ShimUnaryExecNode with GpuPythonExecBase
       }
     }.toArray
     val dataCVs = GpuColumnVector.extractColumns(batch)
-    new ColumnarBatch(boundsCVs ++ dataCVs.map(_.incRefCount()), numRows)
+    new ColumnarBatch((boundsCVs ++ dataCVs.map(_.incRefCount())).toArray, numRows)
   }
 
   override protected def internalDoExecuteColumnar(): RDD[ColumnarBatch] = {
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
index b875c84edbf..dc2845e4461 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
+++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/stringFunctions.scala
@@ -1097,6 +1097,7 @@ class GpuRLikeMeta(
         }
         case StartsWith(s) => GpuStartsWith(lhs, GpuLiteral(s, StringType))
         case Contains(s) => GpuContains(lhs, GpuLiteral(s, StringType))
+        case MultipleContains(ls) => GpuMultipleContains(lhs, ls)
         case PrefixRange(s, length, start, end) =>
           GpuLiteralRangePattern(lhs, GpuLiteral(s, StringType), length, start, end)
         case _ => throw new IllegalStateException("Unexpected optimization type")
@@ -1126,6 +1127,33 @@ case class GpuRLike(left: Expression, right: Expression, pattern: String)
   override def dataType: DataType = BooleanType
 }
 
+case class GpuMultipleContains(input: Expression, searchList: Seq[String])
+  extends GpuUnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  override def dataType: DataType = BooleanType
+
+  override def child: Expression = input
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+
+  override def doColumnar(input: GpuColumnVector): ColumnVector = {
+    assert(searchList.length > 1)
+    val accInit = withResource(Scalar.fromString(searchList.head)) { searchScalar =>
+      input.getBase.stringContains(searchScalar)
+    }
+    searchList.tail.foldLeft(accInit) { (acc, search) =>
+      val containsSearch = withResource(Scalar.fromString(search)) { searchScalar =>
+        input.getBase.stringContains(searchScalar)
+      }
+      withResource(acc) { _ =>
+        withResource(containsSearch) { _ =>
+          acc.or(containsSearch)
+        }
+      }
+    }
+  }
+}
+
 case class GpuLiteralRangePattern(left: Expression, right: Expression, 
     length: Int, start: Int, end: Int)
   extends GpuBinaryExpressionArgsAnyScalar with ImplicitCastInputTypes with NullIntolerant {
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
new file mode 100644
index 00000000000..c04d3b2db29
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
+final class CudfUnsafeRow(
+   attributes: Array[Attribute],
+   remapping: Array[Int]) extends CudfUnsafeRowBase(attributes, remapping)
+
+object CudfUnsafeRow extends CudfUnsafeRowTrait
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala
new file mode 100644
index 00000000000..e5e0bbd3dc6
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRowBase.scala
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import java.util.Arrays
+
+import com.nvidia.spark.rapids.GpuColumnVector
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader
+import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.catalyst.util.MapData
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.Decimal
+import org.apache.spark.unsafe.Platform
+import org.apache.spark.unsafe.array.ByteArrayMethods
+import org.apache.spark.unsafe.hash.Murmur3_x86_32
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
+
+abstract class CudfUnsafeRowBase(
+   protected val attributes: Array[Attribute],
+   protected val remapping: Array[Int]) extends InternalRow {
+  protected var address: Long = _
+  private var startOffsets: Array[Int] = _
+  private var fixedWidthSizeInBytes: Int = _
+  protected var sizeInBytes: Int = _
+
+  def this() = this(null, null)
+
+  init(attributes, remapping)
+
+  private def init(attributes: Array[Attribute], remapping: Array[Int]): Unit = {
+    var offset = 0
+    startOffsets = new Array[Int](attributes.length)
+    for (i <- attributes.indices) {
+      val attr = attributes(i)
+      val length = GpuColumnVector.getNonNestedRapidsType(attr.dataType).getSizeInBytes
+      assert(length > 0, "Only fixed width types are currently supported.")
+      offset = CudfUnsafeRow.alignOffset(offset, length)
+      startOffsets(i) = offset
+      offset += length
+    }
+    fixedWidthSizeInBytes = offset
+    assert(startOffsets.length == remapping.length)
+  }
+
+  override def numFields: Int = startOffsets.length
+
+  def pointTo(address: Long, sizeInBytes: Int): Unit = {
+    assert(startOffsets != null && startOffsets.length > 0, "startOffsets not properly initialized")
+    assert(sizeInBytes % 8 == 0, s"sizeInBytes ($sizeInBytes) should be a multiple of 8")
+    this.address = address
+    this.sizeInBytes = sizeInBytes
+  }
+
+  override def update(ordinal: Int, value: Any): Unit = throw new UnsupportedOperationException()
+
+  override def get(ordinal: Int, dataType: DataType): Object = {
+    SpecializedGettersReader.read(this, ordinal, dataType, true, true)
+  }
+
+  override def isNullAt(ordinal: Int): Boolean = {
+    val i = remapping(ordinal)
+    assertIndexIsValid(i)
+    val validByteIndex = i / 8
+    val validBitIndex = i % 8
+    val b = Platform.getByte(null, address + fixedWidthSizeInBytes + validByteIndex)
+    ((1 << validBitIndex) & b) == 0
+  }
+
+  override def setNullAt(ordinal: Int): Unit = {
+    val i = remapping(ordinal)
+    assertIndexIsValid(i)
+    val validByteIndex = i / 8
+    val validBitIndex = i % 8
+    var b = Platform.getByte(null, address + fixedWidthSizeInBytes + validByteIndex)
+    b = (b & ~(1 << validBitIndex)).toByte
+    Platform.putByte(null, address + fixedWidthSizeInBytes + validByteIndex, b)
+  }
+
+  override def getBoolean(ordinal: Int): Boolean = {
+    Platform.getBoolean(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getByte(ordinal: Int): Byte = {
+    Platform.getByte(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getShort(ordinal: Int): Short = {
+    Platform.getShort(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getInt(ordinal: Int): Int = {
+    Platform.getInt(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getLong(ordinal: Int): Long = {
+    Platform.getLong(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getFloat(ordinal: Int): Float = {
+    Platform.getFloat(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getDouble(ordinal: Int): Double = {
+    Platform.getDouble(null, getFieldAddressFromOrdinal(ordinal))
+  }
+
+  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = {
+    if (isNullAt(ordinal)) {
+      null
+    } else if (precision <= Decimal.MAX_INT_DIGITS) {
+      Decimal.createUnsafe(getInt(ordinal), precision, scale)
+    } else if (precision <= Decimal.MAX_LONG_DIGITS) {
+      Decimal.createUnsafe(getLong(ordinal), precision, scale)
+    } else {
+      throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+    }
+  }
+
+  override def getUTF8String(ordinal: Int): UTF8String = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def getBinary(ordinal: Int): Array[Byte] = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def getInterval(ordinal: Int): CalendarInterval = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def getStruct(ordinal: Int, numFields: Int): CudfUnsafeRow = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def getArray(ordinal: Int): ArrayData = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def getMap(ordinal: Int): MapData = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def copy(): CudfUnsafeRow = {
+    throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+  }
+
+  override def hashCode(): Int = {
+    Murmur3_x86_32.hashUnsafeWords(null, address, sizeInBytes, 42)
+  }
+
+  override def equals(other: Any): Boolean = other match {
+    case o: CudfUnsafeRow =>
+      sizeInBytes == o.sizeInBytes &&
+        ByteArrayMethods.arrayEquals(null, address, null, o.address, sizeInBytes) &&
+        Arrays.equals(this.remapping, o.remapping)
+    case _ => false
+  }
+
+  override def toString: String = {
+    val build = new StringBuilder("[")
+    for (i <- 0 until sizeInBytes by 8) {
+      if (i != 0) build.append(',')
+      build.append(java.lang.Long.toHexString(Platform.getLong(null, address + i)))
+    }
+    build.append(']')
+    build.append(" remapped with ")
+    build.append(Arrays.toString(remapping))
+    build.toString()
+  }
+
+  override def anyNull(): Boolean = throw new IllegalArgumentException("NOT IMPLEMENTED YET")
+
+  private def getFieldAddressFromOrdinal(ordinal: Int): Long = {
+    assertIndexIsValid(ordinal)
+    val i = remapping(ordinal)
+    address + startOffsets(i)
+  }
+
+  private def assertIndexIsValid(index: Int): Unit = {
+    assert(index >= 0, s"index ($index) should >= 0")
+    assert(index < startOffsets.length, s"index ($index) should < ${startOffsets.length}")
+  }
+}
+
+trait CudfUnsafeRowTrait {
+  def alignOffset(offset: Int, alignment: Int): Int = (offset + alignment - 1) & -alignment
+
+  def calculateBitSetWidthInBytes(numFields: Int): Int = (numFields + 7) / 8
+
+  def getRowSizeEstimate(attributes: Array[Attribute]): Int = {
+    var offset = 0
+    for (attr <- attributes) {
+      val length = GpuColumnVector.getNonNestedRapidsType(attr.dataType).getSizeInBytes
+      offset = alignOffset(offset, length)
+      offset += length
+    }
+    val bitSetWidthInBytes = calculateBitSetWidthInBytes(attributes.length)
+    alignOffset(offset + bitSetWidthInBytes, 8)
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
index fd48b8b6375..4d6d4967a80 100644
--- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,7 +26,8 @@ import org.apache.parquet.schema.OriginalType._
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types._
 
 object ParquetSchemaClipShims {
@@ -64,13 +65,13 @@ object ParquetSchemaClipShims {
       if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
 
     def typeNotSupported() =
-      TrampolineUtil.throwAnalysisException(s"Parquet type not supported: $typeString")
+      throw new RapidsAnalysisException(s"Parquet type not supported: $typeString")
 
     def typeNotImplemented() =
-      TrampolineUtil.throwAnalysisException(s"Parquet type not yet supported: $typeString")
+      throw RapidsErrorUtils.parquetTypeUnsupportedYetError(typeString)
 
     def illegalType() =
-      TrampolineUtil.throwAnalysisException(s"Illegal Parquet type: $typeString")
+      throw RapidsErrorUtils.illegalParquetTypeError(typeString)
 
     // When maxPrecision = -1, we skip precision range check, and always respect the precision
     // specified in field.getDecimalMetadata.  This is useful when interpreting decimal types stored
@@ -80,8 +81,7 @@ object ParquetSchemaClipShims {
       val scale = field.getDecimalMetadata.getScale
 
       if (!(maxPrecision == -1 || 1 <= precision && precision <= maxPrecision)) {
-       TrampolineUtil.throwAnalysisException(
-         s"Invalid decimal precision: $typeName " +
+       throw new RapidsAnalysisException(s"Invalid decimal precision: $typeName " +
              s"cannot store $precision digits (max $maxPrecision)")
       }
 
@@ -121,7 +121,7 @@ object ParquetSchemaClipShims {
 
       case INT96 =>
         if (!SQLConf.get.isParquetINT96AsTimestamp) {
-          TrampolineUtil.throwAnalysisException(
+          throw new RapidsAnalysisException(
             "INT96 is not supported unless it's interpreted as timestamp. " +
                 s"Please try to set ${SQLConf.PARQUET_INT96_AS_TIMESTAMP.key} to true.")
         }
diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
new file mode 100644
index 00000000000..de433d5f270
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids.{ExprRule, GpuOverrides}
+import com.nvidia.spark.rapids.{ExprChecks, GpuExpression, TypeSig, UnaryExprMeta}
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, RaiseError}
+import org.apache.spark.sql.rapids.shims.GpuRaiseError
+
+object RaiseErrorShim {
+  val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = {
+    Seq(GpuOverrides.expr[RaiseError](
+      "Throw an exception",
+      ExprChecks.unaryProject(
+        TypeSig.NULL, TypeSig.NULL,
+        TypeSig.STRING, TypeSig.STRING),
+      (a, conf, p, r) => new UnaryExprMeta[RaiseError](a, conf, p, r) {
+        override def convertToGpu(child: Expression): GpuExpression = GpuRaiseError(child)
+      })).map(r => (r.getClassFor.asSubclass(classOf[Expression]), r)).toMap
+  }
+}
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
index d94c8e54683..2dcad0d4226 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/rapids/shims/GpuShuffleExchangeExec.scala
@@ -58,6 +58,7 @@ case class GpuShuffleExchangeExec(
     cpuOutputPartitioning: Partitioning)
     extends GpuShuffleExchangeExecBaseWithMetrics(gpuOutputPartitioning, child)
         with ShuffleExchangeLike {
+  def shuffleId: Int = shuffleDependencyColumnar.shuffleId
 
   override def otherCopyArgs: Seq[AnyRef] = cpuOutputPartitioning :: Nil
 
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
new file mode 100644
index 00000000000..1e1ac57aa60
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.hive.rapids.shims
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command.CommandUtils
+
+object CommandUtilsShim {
+
+  // Shim for CommandUtils.uncacheTableOrView, whose signature changed in Apache Spark 4.0.
+  def uncacheTableOrView(sparkSession: SparkSession, tableId: TableIdentifier): Unit = {
+    CommandUtils.uncacheTableOrView(sparkSession, tableId.quotedString)
+  }
+
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala
index 92fb72801c8..2ea0301fa2c 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala
@@ -45,7 +45,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg
 import org.apache.hadoop.hive.ql.plan.TableDesc
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalog, ExternalCatalogUtils, ExternalCatalogWithListener}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -57,7 +57,8 @@ import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.client.hive._
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
-import org.apache.spark.sql.hive.rapids.{GpuHiveTextFileFormat, GpuSaveAsHiveFile, RapidsHiveErrors}
+import org.apache.spark.sql.hive.rapids.{GpuHiveFileFormat, GpuSaveAsHiveFile, RapidsHiveErrors}
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 final class GpuInsertIntoHiveTableMeta(cmd: InsertIntoHiveTable,
@@ -69,16 +70,17 @@ final class GpuInsertIntoHiveTableMeta(cmd: InsertIntoHiveTable,
   private var fileFormat: Option[ColumnarFileFormat] = None
 
   override def tagSelfForGpuInternal(): Unit = {
-    // Only Hive delimited text writes are currently supported.
-    // Check whether that is the format currently in play.
-    fileFormat = GpuHiveTextFileFormat.tagGpuSupport(this)
+    fileFormat = GpuHiveFileFormat.tagGpuSupport(this)
   }
 
   override def convertToGpu(): GpuDataWritingCommand = {
+    val format = fileFormat.getOrElse(
+      throw new IllegalStateException("fileFormat missing, tagSelfForGpu not called?"))
+
     GpuInsertIntoHiveTable(
       table = wrapped.table,
       partition = wrapped.partition,
-      fileFormat = this.fileFormat.get,
+      fileFormat = format,
       query = wrapped.query,
       overwrite = wrapped.overwrite,
       ifPartitionNotExists = wrapped.ifPartitionNotExists,
@@ -137,7 +139,7 @@ case class GpuInsertIntoHiveTable(
     }
 
     // un-cache this table.
-    CommandUtils.uncacheTableOrView(sparkSession, table.identifier.quotedString)
+    CommandUtilsShim.uncacheTableOrView(sparkSession, table.identifier)
     sparkSession.sessionState.catalog.refreshTable(table.identifier)
 
     CommandUtils.updateTableStats(sparkSession, table)
@@ -192,7 +194,7 @@ case class GpuInsertIntoHiveTable(
       // Report error if any static partition appears after a dynamic partition
       val isDynamic = partitionColumnNames.map(partitionSpec(_).isEmpty)
       if (isDynamic.init.zip(isDynamic.tail).contains((true, false))) {
-        throw new AnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
+        throw RapidsErrorUtils.dynamicPartitionParentError
       }
     }
 
@@ -326,8 +328,10 @@ case class GpuInsertIntoHiveTable(
                 if (!fs.delete(path, true)) {
                   throw RapidsHiveErrors.cannotRemovePartitionDirError(path)
                 }
-                // Don't let Hive do overwrite operation since it is slower.
-                doHiveOverwrite = false
+                // Don't let Hive do overwrite operation since it is slower. But still give a
+                // chance to forcely override this for some customized cases when this
+                // operation is optimized.
+                doHiveOverwrite = hadoopConf.getBoolean("hive.movetask.enable.dir.move", false)
               }
             }
           }
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
index 761d84b4667..977c755712a 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
@@ -49,7 +49,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.ChainedPythonFunctions
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowPythonWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.rapids.shims.ArrowUtilsShim
 import org.apache.spark.sql.types.StructType
@@ -60,7 +59,7 @@ import org.apache.spark.util.Utils
  * Similar to `PythonUDFRunner`, but exchange data with Python worker via Arrow stream.
  */
 class GpuArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     pythonInSchema: StructType,
@@ -69,8 +68,8 @@ class GpuArrowPythonRunner(
     maxBatchSize: Long,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[ColumnarBatch](funcs, evalType, argOffsets, jobArtifactUUID)
-    with GpuArrowPythonOutput with GpuPythonRunnerCommon {
+  extends GpuBasePythonRunner[ColumnarBatch](funcs.map(_._1), evalType, argOffsets,
+    jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriterThread(
       env: SparkEnv,
@@ -82,7 +81,7 @@ class GpuArrowPythonRunner(
 
       val arrowWriter = new GpuArrowPythonWriter(pythonInSchema, maxBatchSize) {
         override protected def writeUDFs(dataOut: DataOutputStream): Unit = {
-          PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+          WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
         }
       }
       val isInputNonEmpty = inputIterator.nonEmpty
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
index adb28725ba1..68112676a2b 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
@@ -50,7 +50,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonRDD}
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -63,7 +62,7 @@ import org.apache.spark.util.Utils
  * and receive it back in JVM as batches of single DataFrame.
  */
 class GpuCoGroupedArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     leftSchema: StructType,
@@ -73,7 +72,7 @@ class GpuCoGroupedArrowPythonRunner(
     batchSize: Int,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[(ColumnarBatch, ColumnarBatch)](funcs, evalType,
+  extends GpuBasePythonRunner[(ColumnarBatch, ColumnarBatch)](funcs.map(_._1), evalType,
     argOffsets, jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriterThread(
@@ -90,7 +89,7 @@ class GpuCoGroupedArrowPythonRunner(
           PythonRDD.writeUTF(k, dataOut)
           PythonRDD.writeUTF(v, dataOut)
         }
-        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+        WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
       }
 
       protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
index eba0286e181..9df93a9d11b 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuGroupedPythonRunnerFactory(
     conf: org.apache.spark.sql.internal.SQLConf,
-    chainedFunc: Seq[ChainedPythonFunctions],
+    chainedFunc: Seq[(ChainedPythonFunctions, Long)],
     argOffsets: Array[Array[Int]],
     dedupAttrs: StructType,
     pythonOutputSchema: StructType,
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
new file mode 100644
index 00000000000..aacf972e7e0
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.execution.python.shims
+
+import java.io.DataOutputStream
+
+import org.apache.spark.api.python.ChainedPythonFunctions
+import org.apache.spark.sql.execution.python.PythonUDFRunner
+
+object WritePythonUDFUtils {
+  def writeUDFs(
+      dataOut: DataOutputStream,
+      funcs: Seq[(ChainedPythonFunctions, Long)],
+      argOffsets: Array[Array[Int]],
+      profiler: Option[String] = None): Unit = {
+    PythonUDFRunner.writeUDFs(dataOut, funcs.map(_._1), argOffsets)
+  }
+}
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index f23229e0956..7fa269db71a 100644
--- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
 
-object RapidsErrorUtils {
+object RapidsErrorUtils extends RapidsQueryErrorUtils {
   def invalidArrayIndexError(index: Int, numElements: Int,
       isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = {
     // Follow the Spark string format before 3.3.0
diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
new file mode 100644
index 00000000000..266cb4ef54f
--- /dev/null
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+spark-rapids-shim-json-lines ***/
+
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.ErrorMsg
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
+import org.apache.spark.sql.types.StructType
+
+trait RapidsQueryErrorUtils {
+
+  def outputPathAlreadyExistsError(qualifiedOutputPath: Path): Throwable = {
+    new AnalysisException(s"path $qualifiedOutputPath already exists.")
+  }
+
+  def createTableAsSelectWithNonEmptyDirectoryError(tablePath: String, conf: String): Throwable = {
+    new AnalysisException(s"CREATE-TABLE-AS-SELECT cannot create table with location to a " +
+      s"non-empty directory $tablePath. To allow overwriting the existing non-empty directory, " +
+      s"set '$conf' to true.")
+  }
+
+  def cannotResolveAttributeError(name: String, outputStr: String): Throwable = {
+    new AnalysisException(s"Unable to resolve $name given [$outputStr]")
+  }
+
+  def partitionColumnNotSpecifiedError(format: String, partitionColumn: String): Throwable = {
+    new AnalysisException(s"Failed to resolve the schema for $format for the partition column: " +
+      s"$partitionColumn. It must be specified manually.")
+  }
+
+  def dataSchemaNotSpecifiedError(format: String): Throwable = {
+    new AnalysisException(s"Unable to infer schema for $format. It must be specified manually.")
+  }
+
+  def schemaNotSpecifiedForSchemaRelationProviderError(className: String): Throwable = {
+    new AnalysisException(s"A schema needs to be specified when using $className.")
+  }
+
+  def userSpecifiedSchemaMismatchActualSchemaError(
+    schema: StructType,
+    actualSchema: StructType): Throwable = {
+    new AnalysisException("The user-specified schema doesn't match the actual schema: " +
+      s"user-specified: ${schema.toDDL}, actual: ${actualSchema.toDDL}. If " +
+      "you're using DataFrameReader.schema API or creating a table, please do not " +
+      "specify the schema. Or if you're scanning an existed table, please drop " +
+      "it and re-create it.")
+  }
+
+  def dataSchemaNotSpecifiedError(format: String, fileCatalog: String): Throwable = {
+    new AnalysisException(s"Unable to infer schema for $format at $fileCatalog. " +
+      "It must be specified manually")
+  }
+
+  def invalidDataSourceError(className: String): Throwable = {
+    new AnalysisException(s"$className is not a valid Spark SQL Data Source.")
+  }
+
+  def orcNotUsedWithHiveEnabledError(): Throwable = {
+    new AnalysisException(
+      s"Hive built-in ORC data source must be used with Hive support enabled. " +
+        s"Please use the native ORC data source by setting 'spark.sql.orc.impl' to 'native'.")
+  }
+
+  def failedToFindAvroDataSourceError(provider: String): Throwable = {
+    new AnalysisException(
+      s"Failed to find data source: $provider. Avro is built-in but external data " +
+        "source module since Spark 2.4. Please deploy the application as per " +
+        "the deployment section of \"Apache Avro Data Source Guide\".")
+  }
+
+  def failedToFindKafkaDataSourceError(provider: String): Throwable = {
+    new AnalysisException(
+      s"Failed to find data source: $provider. Please deploy the application as " +
+        "per the deployment section of " +
+        "\"Structured Streaming + Kafka Integration Guide\".")
+  }
+
+  def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): Throwable = {
+    new AnalysisException(
+      s"Multiple sources found for $provider " +
+        s"(${sourceNames.mkString(", ")}), please specify the fully qualified class name.")
+  }
+
+  def dataPathNotExistError(path: String): Throwable = {
+    new AnalysisException(s"Path does not exist: $path")
+  }
+
+  def dynamicPartitionParentError: Throwable = {
+    throw new RapidsAnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
+  }
+
+  def tableOrViewAlreadyExistsError(tableName: String): Throwable = {
+    new AnalysisException(s"Table $tableName already exists. You need to drop it first.")
+  }
+
+  def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
+    new AnalysisException(s"Parquet type not yet supported: $parquetType.")
+  }
+
+  def illegalParquetTypeError(parquetType: String): Throwable = {
+    new AnalysisException(s"Illegal Parquet type: $parquetType.")
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/misc.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/misc.scala
similarity index 75%
rename from sql-plugin/src/main/scala/org/apache/spark/sql/rapids/misc.scala
rename to sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/misc.scala
index b32bdfa207c..1ab58ddcbb6 100644
--- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/misc.scala
+++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/shims/misc.scala
@@ -13,10 +13,36 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+/*** spark-rapids-shim-json-lines
+{"spark": "311"}
+{"spark": "312"}
+{"spark": "313"}
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
 
-package org.apache.spark.sql.rapids
-
-import ai.rapids.cudf.{ColumnVector}
+import ai.rapids.cudf.ColumnVector
 import com.nvidia.spark.rapids.{GpuColumnVector, GpuUnaryExpression}
 import com.nvidia.spark.rapids.Arm.withResource
 
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
index 8c82074b8f5..aec35945b4e 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
@@ -33,101 +33,17 @@
 {"spark": "343"}
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
-import java.io.EOFException
-import java.nio.ByteBuffer
-import java.nio.channels.SeekableByteChannel
-
-import ai.rapids.cudf.HostMemoryBuffer
-import com.nvidia.spark.rapids.Arm.closeOnExcept
 import com.nvidia.spark.rapids.GpuMetric
-import com.nvidia.spark.rapids.filecache.FileCache
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hive.common.io.DiskRangeList
-import org.apache.orc.OrcProto
-import org.apache.orc.impl.{BufferChunk, BufferChunkList, DataReaderProperties, InStream, OrcCodecPool}
+import org.apache.orc.impl.DataReaderProperties
 
 class GpuOrcDataReader(
     props: DataReaderProperties,
     conf: Configuration,
-    metrics: Map[String, GpuMetric]) extends GpuOrcDataReaderBase(props, conf, metrics) {
-
-  private class BufferChunkLoader(useDirect: Boolean) extends BlockLoader {
-    override def loadRemoteBlocks(
-        baseOffset: Long,
-        first: DiskRangeList,
-        last: DiskRangeList,
-        data: ByteBuffer): DiskRangeList = {
-      var current = first
-      val offset = current.getOffset
-      while (current ne last.next) {
-        val buffer = if (current eq last) data else data.duplicate()
-        buffer.position((current.getOffset - offset).toInt)
-        buffer.limit((current.getEnd - offset).toInt)
-        current.asInstanceOf[BufferChunk].setChunk(buffer)
-        // see if the filecache wants any of this data
-        val cacheToken = FileCache.get.startDataRangeCache(filePathString,
-          baseOffset + current.getOffset, current.getLength, conf)
-        cacheToken.foreach { token =>
-          val hmb = closeOnExcept(HostMemoryBuffer.allocate(current.getLength, false)) { hmb =>
-            hmb.setBytes(0, buffer.array(),
-              buffer.arrayOffset() + buffer.position(), current.getLength)
-            hmb
-          }
-          token.complete(hmb)
-        }
-        current = current.next
-      }
-      current
-    }
-
-    override def loadCachedBlock(
-        chunk: DiskRangeList,
-        channel: SeekableByteChannel): DiskRangeList = {
-      val buffer = if (useDirect) {
-        ByteBuffer.allocateDirect(chunk.getLength)
-      } else {
-        ByteBuffer.allocate(chunk.getLength)
-      }
-      while (buffer.remaining() > 0) {
-        if (channel.read(buffer) < 0) {
-          throw new EOFException(s"Unexpected EOF while reading cached block for $filePathString")
-        }
-      }
-      buffer.flip()
-      chunk.asInstanceOf[BufferChunk].setChunk(buffer)
-      chunk
-    }
-  }
-
-  override protected def parseStripeFooter(buf: ByteBuffer, size: Int): OrcProto.StripeFooter = {
-    OrcProto.StripeFooter.parseFrom(
-      InStream.createCodedInputStream(InStream.create("footer",
-        new BufferChunk(buf, 0), 0, size, compression)))
-  }
-
-  override def getCompressionOptions: InStream.StreamOptions = compression
-
-  override def readFileData(chunks: BufferChunkList, forceDirect: Boolean): BufferChunkList = {
-    if (chunks != null) {
-      readDiskRanges(chunks.get, 0, new BufferChunkLoader(forceDirect))
-    }
-    chunks
-  }
-
-  override def close(): Unit = {
-    if (compression.getCodec != null) {
-      if (compression.getCodec != null) {
-        OrcCodecPool.returnCodec(compression.getCodec.getKind, compression.getCodec)
-        compression.withCodec(null)
-      }
-    }
-    super.close()
-  }
-}
+    metrics: Map[String, GpuMetric]) extends GpuOrcDataReader320Plus(props, conf, metrics)
 
 object GpuOrcDataReader {
   // File cache is being used, so we want read ranges that can be cached separately
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala
new file mode 100644
index 00000000000..e28f7001a2b
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader320Plus.scala
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import java.io.EOFException
+import java.nio.ByteBuffer
+import java.nio.channels.SeekableByteChannel
+
+import ai.rapids.cudf.HostMemoryBuffer
+import com.nvidia.spark.rapids.Arm.closeOnExcept
+import com.nvidia.spark.rapids.GpuMetric
+import com.nvidia.spark.rapids.filecache.FileCache
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.common.io.DiskRangeList
+import org.apache.orc.OrcProto
+import org.apache.orc.impl.{BufferChunk, BufferChunkList, DataReaderProperties, InStream, OrcCodecPool}
+
+abstract class GpuOrcDataReader320Plus(
+    props: DataReaderProperties,
+    conf: Configuration,
+    metrics: Map[String, GpuMetric]) extends GpuOrcDataReaderBase(props, conf, metrics) {
+
+  private class BufferChunkLoader(useDirect: Boolean) extends BlockLoader {
+    override def loadRemoteBlocks(
+        baseOffset: Long,
+        first: DiskRangeList,
+        last: DiskRangeList,
+        data: ByteBuffer): DiskRangeList = {
+      var current = first
+      val offset = current.getOffset
+      while (current ne last.next) {
+        val buffer = if (current eq last) data else data.duplicate()
+        buffer.position((current.getOffset - offset).toInt)
+        buffer.limit((current.getEnd - offset).toInt)
+        current.asInstanceOf[BufferChunk].setChunk(buffer)
+        // see if the filecache wants any of this data
+        val cacheToken = FileCache.get.startDataRangeCache(filePathString,
+          baseOffset + current.getOffset, current.getLength, conf)
+        cacheToken.foreach { token =>
+          val hmb = closeOnExcept(HostMemoryBuffer.allocate(current.getLength, false)) { hmb =>
+            hmb.setBytes(0, buffer.array(),
+              buffer.arrayOffset() + buffer.position(), current.getLength)
+            hmb
+          }
+          token.complete(hmb)
+        }
+        current = current.next
+      }
+      current
+    }
+
+    override def loadCachedBlock(
+        chunk: DiskRangeList,
+        channel: SeekableByteChannel): DiskRangeList = {
+      val buffer = if (useDirect) {
+        ByteBuffer.allocateDirect(chunk.getLength)
+      } else {
+        ByteBuffer.allocate(chunk.getLength)
+      }
+      while (buffer.remaining() > 0) {
+        if (channel.read(buffer) < 0) {
+          throw new EOFException(s"Unexpected EOF while reading cached block for $filePathString")
+        }
+      }
+      buffer.flip()
+      chunk.asInstanceOf[BufferChunk].setChunk(buffer)
+      chunk
+    }
+  }
+
+  override protected def parseStripeFooter(buf: ByteBuffer, size: Int): OrcProto.StripeFooter = {
+    OrcProto.StripeFooter.parseFrom(
+      InStream.createCodedInputStream(InStream.create("footer",
+        new BufferChunk(buf, 0), 0, size, compression)))
+  }
+
+  override def getCompressionOptions: InStream.StreamOptions = compression
+
+  override def readFileData(chunks: BufferChunkList, forceDirect: Boolean): BufferChunkList = {
+    if (chunks != null) {
+      readDiskRanges(chunks.get, 0, new BufferChunkLoader(forceDirect))
+    }
+    chunks
+  }
+
+  override def close(): Unit = {
+    if (compression.getCodec != null) {
+      if (compression.getCodec != null) {
+        OrcCodecPool.returnCodec(compression.getCodec.getKind, compression.getCodec)
+        compression.withCodec(null)
+      }
+    }
+    super.close()
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
index c3152a8a235..bba205f267f 100644
--- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
+++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
@@ -29,7 +29,8 @@ import org.apache.parquet.schema.LogicalTypeAnnotation._
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types._
 
 object ParquetSchemaClipShims {
@@ -67,10 +68,10 @@ object ParquetSchemaClipShims {
       if (typeAnnotation == null) s"$typeName" else s"$typeName ($typeAnnotation)"
 
     def typeNotImplemented() =
-      TrampolineUtil.throwAnalysisException(s"Parquet type not yet supported: $typeString")
+      throw RapidsErrorUtils.parquetTypeUnsupportedYetError(typeString)
 
     def illegalType() =
-      TrampolineUtil.throwAnalysisException(s"Illegal Parquet type: $typeString")
+      throw RapidsErrorUtils.illegalParquetTypeError(typeString)
 
     // When maxPrecision = -1, we skip precision range check, and always respect the precision
     // specified in field.getDecimalMetadata.  This is useful when interpreting decimal types stored
@@ -82,7 +83,7 @@ object ParquetSchemaClipShims {
       val scale = decimalLogicalTypeAnnotation.getScale
 
       if (!(maxPrecision == -1 || 1 <= precision && precision <= maxPrecision)) {
-        TrampolineUtil.throwAnalysisException(
+        throw new RapidsAnalysisException(
           s"Invalid decimal precision: $typeName " +
               s"cannot store $precision digits (max $maxPrecision)")
       }
@@ -143,14 +144,14 @@ object ParquetSchemaClipShims {
             TimestampType
           case timestamp: TimestampLogicalTypeAnnotation if timestamp.getUnit == TimeUnit.NANOS &&
               ParquetLegacyNanoAsLongShims.legacyParquetNanosAsLong =>
-            TrampolineUtil.throwAnalysisException(
+            throw new RapidsAnalysisException(
               "GPU does not support spark.sql.legacy.parquet.nanosAsLong")
           case _ => illegalType()
         }
 
       case INT96 =>
         if (!SQLConf.get.isParquetINT96AsTimestamp) {
-          TrampolineUtil.throwAnalysisException(
+          throw new RapidsAnalysisException(
             "INT96 is not supported unless it's interpreted as timestamp. " +
               s"Please try to set ${SQLConf.PARQUET_INT96_AS_TIMESTAMP.key} to true.")
         }
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index b301397255a..68a6ce30569 100644
--- a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
 
-object RapidsErrorUtils {
+object RapidsErrorUtils extends RapidsQueryErrorUtils {
   def invalidArrayIndexError(index: Int, numElements: Int,
       isElementAtF: Boolean = false): ArrayIndexOutOfBoundsException = {
     // Follow the Spark string format before 3.3.0
diff --git a/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
new file mode 100644
index 00000000000..dbc4145ee54
--- /dev/null
+++ b/sql-plugin/src/main/spark320/scala/org/apache/spark/sql/rapids/shims/RapidsQueryErrorUtils.scala
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "320"}
+{"spark": "321"}
+{"spark": "321cdh"}
+{"spark": "322"}
+{"spark": "323"}
+{"spark": "324"}
+{"spark": "330"}
+{"spark": "330cdh"}
+{"spark": "330db"}
+{"spark": "331"}
+{"spark": "332"}
+{"spark": "332cdh"}
+{"spark": "332db"}
+{"spark": "333"}
+{"spark": "334"}
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "341db"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+
+package org.apache.spark.sql.rapids.shims
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.ErrorMsg
+
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
+import org.apache.spark.sql.types.StructType
+
+trait RapidsQueryErrorUtils {
+
+  def outputPathAlreadyExistsError(qualifiedOutputPath: Path): Throwable = {
+    QueryCompilationErrors.outputPathAlreadyExistsError(qualifiedOutputPath)
+  }
+
+  def createTableAsSelectWithNonEmptyDirectoryError(tablePath: String, conf: String): Throwable = {
+    QueryCompilationErrors.createTableAsSelectWithNonEmptyDirectoryError(tablePath)
+  }
+
+  def cannotResolveAttributeError(name: String, outputStr: String): Throwable = {
+    QueryCompilationErrors.cannotResolveAttributeError(name, outputStr)
+  }
+
+  def partitionColumnNotSpecifiedError(format: String, partitionColumn: String): Throwable = {
+    QueryCompilationErrors.partitionColumnNotSpecifiedError(format, partitionColumn)
+  }
+
+  def dataSchemaNotSpecifiedError(format: String): Throwable = {
+    QueryCompilationErrors.dataSchemaNotSpecifiedError(format)
+  }
+
+  def schemaNotSpecifiedForSchemaRelationProviderError(className: String): Throwable = {
+    QueryCompilationErrors.schemaNotSpecifiedForSchemaRelationProviderError(className)
+  }
+
+  def userSpecifiedSchemaMismatchActualSchemaError(
+    schema: StructType,
+    actualSchema: StructType): Throwable = {
+    QueryCompilationErrors.userSpecifiedSchemaMismatchActualSchemaError(schema, actualSchema)
+  }
+
+  def dataSchemaNotSpecifiedError(format: String, fileCatalog: String): Throwable = {
+    QueryCompilationErrors.dataSchemaNotSpecifiedError(format, fileCatalog)
+  }
+
+  def invalidDataSourceError(className: String): Throwable = {
+    QueryCompilationErrors.invalidDataSourceError(className)
+  }
+
+  def orcNotUsedWithHiveEnabledError(): Throwable = {
+    QueryCompilationErrors.orcNotUsedWithHiveEnabledError()
+  }
+
+  def failedToFindAvroDataSourceError(provider: String): Throwable = {
+    QueryCompilationErrors.failedToFindAvroDataSourceError(provider)
+  }
+
+  def failedToFindKafkaDataSourceError(provider: String): Throwable = {
+    QueryCompilationErrors.failedToFindKafkaDataSourceError(provider)
+  }
+
+  def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): Throwable = {
+    QueryCompilationErrors.findMultipleDataSourceError(provider, sourceNames)
+  }
+
+  def dataPathNotExistError(path: String): Throwable = {
+    QueryCompilationErrors.dataPathNotExistError(path)
+  }
+
+  def tableOrViewAlreadyExistsError(tableName: String): Throwable = {
+    QueryCompilationErrors.tableOrViewAlreadyExistsError(tableName)
+  }
+
+  def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
+    QueryCompilationErrors.parquetTypeUnsupportedYetError(parquetType)
+  }
+
+  def illegalParquetTypeError(parquetType: String): Throwable = {
+    QueryCompilationErrors.illegalParquetTypeError(parquetType)
+  }
+
+  def dynamicPartitionParentError: Throwable = {
+    throw new RapidsAnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
+  }
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
index 56708017a23..8c395274e07 100644
--- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
+++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/ParquetSchemaClipShims.scala
@@ -44,7 +44,8 @@ import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
 import org.apache.spark.sql.execution.datasources.parquet.ParquetReadSupport.containsFieldIds
 import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.types._
 
 object ParquetSchemaClipShims {
@@ -109,10 +110,11 @@ object ParquetSchemaClipShims {
       if (typeAnnotation == null) s"$typeName" else s"$typeName ($typeAnnotation)"
 
     def typeNotImplemented() =
-      TrampolineUtil.throwAnalysisException(s"Parquet type not yet supported: $typeString")
+      throw RapidsErrorUtils.parquetTypeUnsupportedYetError(typeString)
 
     def illegalType() =
-      TrampolineUtil.throwAnalysisException(s"Illegal Parquet type: $parquetType")
+      throw RapidsErrorUtils.illegalParquetTypeError(typeString)
+
 
     // When maxPrecision = -1, we skip precision range check, and always respect the precision
     // specified in field.getDecimalMetadata.  This is useful when interpreting decimal types stored
@@ -124,7 +126,7 @@ object ParquetSchemaClipShims {
       val scale = decimalLogicalTypeAnnotation.getScale
 
       if (!(maxPrecision == -1 || 1 <= precision && precision <= maxPrecision)) {
-        TrampolineUtil.throwAnalysisException(s"Invalid decimal precision: $typeName " +
+        throw new RapidsAnalysisException(s"Invalid decimal precision: $typeName " +
             s"cannot store $precision digits (max $maxPrecision)")
       }
 
@@ -183,14 +185,14 @@ object ParquetSchemaClipShims {
               ParquetTimestampAnnotationShims.timestampTypeForMillisOrMicros(timestamp)
           case timestamp: TimestampLogicalTypeAnnotation if timestamp.getUnit == TimeUnit.NANOS &&
               ParquetLegacyNanoAsLongShims.legacyParquetNanosAsLong =>
-            TrampolineUtil.throwAnalysisException(
+            throw new RapidsAnalysisException(
               "GPU does not support spark.sql.legacy.parquet.nanosAsLong")
           case _ => illegalType()
         }
 
       case INT96 =>
         if (!SQLConf.get.isParquetINT96AsTimestamp) {
-          TrampolineUtil.throwAnalysisException(
+          throw new RapidsAnalysisException(
             "INT96 is not supported unless it's interpreted as timestamp. " +
               s"Please try to set ${SQLConf.PARQUET_INT96_AS_TIMESTAMP.key} to true.")
         }
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
index 768261cbc89..5118c21ff2e 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExec.scala
@@ -31,7 +31,6 @@
 {"spark": "343"}
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index bb28c370749..e5cdcd43568 100644
--- a/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark330/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
 
-object RapidsErrorUtils extends RapidsErrorUtilsFor330plus {
+object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
 
   def mapKeyNotExistError(
       key: String,
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
index cb8eef809f3..a6338e7adc5 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
@@ -30,7 +30,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python._
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowPythonWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -50,7 +49,7 @@ import org.apache.spark.util.Utils
  *     more data being sent.
  */
 class GpuGroupUDFArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     pythonInSchema: StructType,
@@ -59,8 +58,8 @@ class GpuGroupUDFArrowPythonRunner(
     maxBatchSize: Long,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[ColumnarBatch](funcs, evalType, argOffsets, jobArtifactUUID)
-    with GpuArrowPythonOutput with GpuPythonRunnerCommon {
+  extends GpuBasePythonRunner[ColumnarBatch](funcs.map(_._1), evalType, argOffsets,
+    jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriterThread(
       env: SparkEnv,
@@ -72,7 +71,7 @@ class GpuGroupUDFArrowPythonRunner(
 
       val arrowWriter = new GpuArrowPythonWriter(pythonInSchema, maxBatchSize) {
         override protected def writeUDFs(dataOut: DataOutputStream): Unit = {
-          PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+          WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
         }
       }
 
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
index 451de0a2527..313ea6c20a2 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuGroupedPythonRunnerFactory(
   conf: org.apache.spark.sql.internal.SQLConf,
-  chainedFunc: Seq[ChainedPythonFunctions],
+  chainedFunc: Seq[(ChainedPythonFunctions, Long)],
   argOffsets: Array[Array[Int]],
   dedupAttrs: StructType,
   pythonOutputSchema: StructType,
diff --git a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 1012b28d8b7..7e58a54c921 100644
--- a/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark330db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@ package org.apache.spark.sql.rapids.shims
 
 import org.apache.spark.sql.errors.QueryExecutionErrors
 
-object RapidsErrorUtils extends RapidsErrorUtilsBase {
+object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils {
   def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
     QueryExecutionErrors.elementAtByIndexZeroError(context = null)
   }
diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
index 9105ab50e1e..42fd5941025 100644
--- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
+++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala
@@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg
 import org.apache.hadoop.hive.ql.plan.TableDesc
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalog, ExternalCatalogUtils, ExternalCatalogWithListener}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -47,7 +47,8 @@ import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
-import org.apache.spark.sql.hive.rapids.{GpuHiveTextFileFormat, GpuSaveAsHiveFile, RapidsHiveErrors}
+import org.apache.spark.sql.hive.rapids.{GpuHiveFileFormat, GpuSaveAsHiveFile, RapidsHiveErrors}
+import org.apache.spark.sql.rapids.shims.RapidsErrorUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 final class GpuInsertIntoHiveTableMeta(cmd: InsertIntoHiveTable,
@@ -59,16 +60,17 @@ final class GpuInsertIntoHiveTableMeta(cmd: InsertIntoHiveTable,
   private var fileFormat: Option[ColumnarFileFormat] = None
 
   override def tagSelfForGpuInternal(): Unit = {
-    // Only Hive delimited text writes are currently supported.
-    // Check whether that is the format currently in play.
-    fileFormat = GpuHiveTextFileFormat.tagGpuSupport(this)
+    fileFormat = GpuHiveFileFormat.tagGpuSupport(this)
   }
 
   override def convertToGpu(): GpuDataWritingCommand = {
+    val format = fileFormat.getOrElse(
+      throw new IllegalStateException("fileFormat missing, tagSelfForGpu not called?"))
+
     GpuInsertIntoHiveTable(
       table = wrapped.table,
       partition = wrapped.partition,
-      fileFormat = this.fileFormat.get,
+      fileFormat = format,
       query = wrapped.query,
       overwrite = wrapped.overwrite,
       ifPartitionNotExists = wrapped.ifPartitionNotExists,
@@ -127,7 +129,7 @@ case class GpuInsertIntoHiveTable(
     }
 
     // un-cache this table.
-    CommandUtils.uncacheTableOrView(sparkSession, table.identifier.quotedString)
+    CommandUtilsShim.uncacheTableOrView(sparkSession, table.identifier)
     sparkSession.sessionState.catalog.refreshTable(table.identifier)
 
     CommandUtils.updateTableStats(sparkSession, table)
@@ -181,7 +183,7 @@ case class GpuInsertIntoHiveTable(
       // Report error if any static partition appears after a dynamic partition
       val isDynamic = partitionColumnNames.map(partitionSpec(_).isEmpty)
       if (isDynamic.init.zip(isDynamic.tail).contains((true, false))) {
-        throw new AnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
+        throw RapidsErrorUtils.dynamicPartitionParentError
       }
     }
 
@@ -315,8 +317,10 @@ case class GpuInsertIntoHiveTable(
                 if (!fs.delete(path, true)) {
                   throw RapidsHiveErrors.cannotRemovePartitionDirError(path)
                 }
-                // Don't let Hive do overwrite operation since it is slower.
-                doHiveOverwrite = false
+                // Don't let Hive do overwrite operation since it is slower. But still give a
+                // chance to forcely override this for some customized cases when this
+                // operation is optimized.
+                doHiveOverwrite = hadoopConf.getBoolean("hive.movetask.enable.dir.move", false)
               }
             }
           }
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
index 78daa0bf6f1..e7b3561f5fd 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala
@@ -42,7 +42,7 @@ import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.shuffle.FetchFailedException
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeSet, Expression, SortOrder}
@@ -51,6 +51,7 @@ import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.datasources.{GpuWriteFiles, GpuWriteFilesExec, GpuWriteFilesSpec, WriteTaskResult, WriteTaskStats}
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.OutputSpec
+import org.apache.spark.sql.rapids.execution.RapidsAnalysisException
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.{SerializableConfiguration, Utils}
@@ -61,7 +62,7 @@ object GpuFileFormatWriter extends Logging {
   private def verifySchema(format: ColumnarFileFormat, schema: StructType): Unit = {
     schema.foreach { field =>
       if (!format.supportDataType(field.dataType)) {
-        throw new AnalysisException(
+        throw new RapidsAnalysisException(
           s"$format data source does not support ${field.dataType.catalogString} data type.")
       }
     }
diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
index 9e36cf41fad..6308f24c552 100644
--- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
+++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/shims/GpuCreateDataSourceTableAsSelectCommandShims.scala
@@ -64,7 +64,7 @@ case class GpuCreateDataSourceTableAsSelectCommand(
         s"Expect the table $tableName has been dropped when the save mode is Overwrite")
 
       if (mode == SaveMode.ErrorIfExists) {
-        throw new AnalysisException(s"Table $tableName already exists. You need to drop it first.")
+        throw RapidsErrorUtils.tableOrViewAlreadyExistsError(tableName)
       }
       if (mode == SaveMode.Ignore) {
         // Since the table already exists and the save mode is Ignore, we will just return.
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
index 39f42d8b833..5fb252524fd 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
@@ -22,7 +22,6 @@
 {"spark": "343"}
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
index ca2fa215892..62fe32ae8db 100644
--- a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -19,27 +19,7 @@
 {"spark": "341"}
 {"spark": "342"}
 {"spark": "343"}
-{"spark": "350"}
-{"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
-import org.apache.spark.paths.SparkPath
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.PartitionedFile
-
-object PartitionedFileUtilsShim {
-  // Wrapper for case class constructor so Java code can access
-  // the default values across Spark versions.
-  def newPartitionedFile(
-      partitionValues: InternalRow,
-      filePath: String,
-      start: Long,
-      length: Long): PartitionedFile = PartitionedFile(partitionValues,
-    SparkPath.fromPathString(filePath), start, length)
-
-  def withNewLocations(pf: PartitionedFile, locations: Seq[String]): PartitionedFile = {
-    pf.copy(locations = locations.toArray)
-  }
-}
+object PartitionedFileUtilsShim extends PartitionedFileUtilsShimBase
diff --git a/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
new file mode 100644
index 00000000000..a94c76dc083
--- /dev/null
+++ b/sql-plugin/src/main/spark340/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShimBase.scala
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "340"}
+{"spark": "341"}
+{"spark": "342"}
+{"spark": "343"}
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+
+trait PartitionedFileUtilsShimBase {
+
+  // Wrapper for case class constructor so Java code can access
+  // the default values across Spark versions.
+  def newPartitionedFile(partitionValues: InternalRow,
+                         filePath: String,
+                         start: Long,
+                         length: Long): PartitionedFile = PartitionedFile(partitionValues,
+    SparkPath.fromPathString(filePath), start, length)
+
+  def withNewLocations(pf: PartitionedFile, locations: Seq[String]): PartitionedFile = {
+    pf.copy(locations = locations.toArray)
+  }
+}
diff --git a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index 8ee0485ab36..e6f8886f19c 100644
--- a/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark340/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, Decimal, DecimalType}
 
-object RapidsErrorUtils extends RapidsErrorUtilsFor330plus {
+object RapidsErrorUtils extends RapidsErrorUtilsFor330plus with RapidsQueryErrorUtils {
 
   def mapKeyNotExistError(
       key: String,
diff --git a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
index 249502f1b49..0f1bdafde7a 100644
--- a/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
+++ b/sql-plugin/src/main/spark341db/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,8 +20,10 @@ spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
 import org.apache.spark.paths.SparkPath
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.PartitionedFileUtil
+import org.apache.spark.sql.execution.datasources.{FileStatusWithMetadata, PartitionedFile}
 
 object PartitionedFileUtilsShim {
   // Wrapper for case class constructor so Java code can access
@@ -37,4 +39,14 @@ object PartitionedFileUtilsShim {
   def withNewLocations(pf: PartitionedFile, locations: Seq[String]): PartitionedFile = {
     pf.copy(locations = locations)
   }
+
+  // In Spark 4.0, PartitionedFileUtil.splitFiles lost its `sparkSession` parameter.
+  // This pre-Spark-4.0 shim keeps the `sparkSession` parameter.
+  def splitFiles(sparkSession: SparkSession,
+                 file: FileStatusWithMetadata,
+                 isSplitable: Boolean,
+                 maxSplitBytes: Long,
+                 partitionValues: InternalRow): Seq[PartitionedFile] = {
+    PartitionedFileUtil.splitFiles(sparkSession, file, isSplitable, maxSplitBytes, partitionValues)
+  }
 }
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
index 3b94d5a5201..1934cb6af9f 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/execution/rapids/shims/SplitFiles.scala
@@ -23,12 +23,12 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.execution.rapids.shims
 
+import com.nvidia.spark.rapids.shims.PartitionedFileUtilsShim
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory, PartitionedFile}
 
 trait SplitFiles {
@@ -49,7 +49,7 @@ trait SplitFiles {
 
     selectedPartitions.flatMap { partition =>
       partition.files.flatMap { f =>
-        PartitionedFileUtil.splitFiles(
+        PartitionedFileUtilsShim.splitFiles(
           sparkSession,
           f,
           isSplitable = canBeSplit(f.getPath, hadoopConf),
@@ -71,7 +71,7 @@ trait SplitFiles {
         val filePath = file.getPath
         val isSplitable = relation.fileFormat.isSplitable(
           relation.sparkSession, relation.options, filePath)
-        PartitionedFileUtil.splitFiles(
+        PartitionedFileUtilsShim.splitFiles(
           sparkSession = relation.sparkSession,
           file = file,
           isSplitable = isSplitable,
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
index ac58baa2eb7..50c5e280e9c 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuArrowPythonRunner.scala
@@ -25,7 +25,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python._
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowPythonWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.rapids.shims.ArrowUtilsShim
 import org.apache.spark.sql.types.StructType
@@ -35,7 +34,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * Similar to `PythonUDFRunner`, but exchange data with Python worker via Arrow stream.
  */
 class GpuArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     pythonInSchema: StructType,
@@ -44,8 +43,8 @@ class GpuArrowPythonRunner(
     maxBatchSize: Long,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[ColumnarBatch](funcs, evalType, argOffsets, jobArtifactUUID)
-    with GpuArrowPythonOutput with GpuPythonRunnerCommon {
+  extends GpuBasePythonRunner[ColumnarBatch](funcs.map(_._1), evalType, argOffsets,
+    jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriter(
       env: SparkEnv,
@@ -57,7 +56,7 @@ class GpuArrowPythonRunner(
 
       val arrowWriter = new GpuArrowPythonWriter(pythonInSchema, maxBatchSize) {
         override protected def writeUDFs(dataOut: DataOutputStream): Unit = {
-          PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+          WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
         }
       }
       val isInputNonEmpty = inputIterator.nonEmpty
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
index aad1eb52c02..0317a89009e 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuCoGroupedArrowPythonRunner.scala
@@ -27,7 +27,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonRDD, PythonWorker}
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -39,7 +38,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * and receive it back in JVM as batches of single DataFrame.
  */
 class GpuCoGroupedArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     leftSchema: StructType,
@@ -49,7 +48,7 @@ class GpuCoGroupedArrowPythonRunner(
     batchSize: Int,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[(ColumnarBatch, ColumnarBatch)](funcs, evalType,
+  extends GpuBasePythonRunner[(ColumnarBatch, ColumnarBatch)](funcs.map(_._1), evalType,
     argOffsets, jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriter(
@@ -67,7 +66,7 @@ class GpuCoGroupedArrowPythonRunner(
           PythonRDD.writeUTF(k, dataOut)
           PythonRDD.writeUTF(v, dataOut)
         }
-        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+        WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
       }
 
       override def writeNextInputToStream(dataOut: DataOutputStream): Boolean = {
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
index 4393c8b7057..42c6178ff83 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupUDFArrowPythonRunner.scala
@@ -28,7 +28,6 @@ import com.nvidia.spark.rapids.GpuSemaphore
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python._
-import org.apache.spark.sql.execution.python.PythonUDFRunner
 import org.apache.spark.sql.rapids.execution.python.{GpuArrowPythonWriter, GpuPythonRunnerCommon}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -47,7 +46,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  *     more data being sent.
  */
 class GpuGroupUDFArrowPythonRunner(
-    funcs: Seq[ChainedPythonFunctions],
+    funcs: Seq[(ChainedPythonFunctions, Long)],
     evalType: Int,
     argOffsets: Array[Array[Int]],
     pythonInSchema: StructType,
@@ -56,8 +55,8 @@ class GpuGroupUDFArrowPythonRunner(
     batchSize: Long,
     override val pythonOutSchema: StructType,
     jobArtifactUUID: Option[String] = None)
-  extends GpuBasePythonRunner[ColumnarBatch](funcs, evalType, argOffsets, jobArtifactUUID)
-    with GpuArrowPythonOutput with GpuPythonRunnerCommon {
+  extends GpuBasePythonRunner[ColumnarBatch](funcs.map(_._1), evalType, argOffsets,
+    jobArtifactUUID) with GpuArrowPythonOutput with GpuPythonRunnerCommon {
 
   protected override def newWriter(
       env: SparkEnv,
@@ -69,7 +68,7 @@ class GpuGroupUDFArrowPythonRunner(
 
       val arrowWriter = new GpuArrowPythonWriter(pythonInSchema, batchSize) {
         override protected def writeUDFs(dataOut: DataOutputStream): Unit = {
-          PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+          WritePythonUDFUtils.writeUDFs(dataOut, funcs, argOffsets)
         }
       }
 
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
index b1dabbf5b5e..63a4289c5b0 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/execution/python/shims/GpuGroupedPythonRunnerFactory.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuGroupedPythonRunnerFactory(
   conf: org.apache.spark.sql.internal.SQLConf,
-  chainedFunc: Seq[ChainedPythonFunctions],
+  chainedFunc: Seq[(ChainedPythonFunctions, Long)],
   argOffsets: Array[Array[Int]],
   dedupAttrs: StructType,
   pythonOutputSchema: StructType,
diff --git a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
index a0ba17f9bd4..9b800d4e51a 100644
--- a/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
+++ b/sql-plugin/src/main/spark341db/scala/org/apache/spark/sql/rapids/shims/RapidsErrorUtils.scala
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@ package org.apache.spark.sql.rapids.shims
 
 import org.apache.spark.sql.errors.QueryExecutionErrors
 
-object RapidsErrorUtils extends RapidsErrorUtilsBase {
+object RapidsErrorUtils extends RapidsErrorUtilsBase with RapidsQueryErrorUtils {
   def sqlArrayIndexNotStartAtOneError(): RuntimeException = {
     QueryExecutionErrors.invalidIndexOfZeroError(context = null)
   }
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
index 4bbc4644241..4b29de25bf0 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
@@ -17,68 +17,18 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
 import com.nvidia.spark.rapids._
 
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 
 class BatchScanExecMeta(p: BatchScanExec,
     conf: RapidsConf,
     parent: Option[RapidsMeta[_, _, _]],
     rule: DataFromReplacementRule)
-    extends SparkPlanMeta[BatchScanExec](p, conf, parent, rule) {
-  // Replaces SubqueryBroadcastExec inside dynamic pruning filters with GPU counterpart
-  // if possible. Instead regarding filters as childExprs of current Meta, we create
-  // a new meta for SubqueryBroadcastExec. The reason is that the GPU replacement of
-  // BatchScanExec is independent from the replacement of the runtime filters. It is
-  // possible that the BatchScanExec is on the CPU, while the dynamic runtime filters
-  // are on the GPU. And vice versa.
-  private lazy val runtimeFilters = {
-    val convertBroadcast = (bc: SubqueryBroadcastExec) => {
-      val meta = GpuOverrides.wrapAndTagPlan(bc, conf)
-      meta.tagForExplain()
-      meta.convertIfNeeded().asInstanceOf[BaseSubqueryExec]
-    }
-    wrapped.runtimeFilters.map { filter =>
-      filter.transformDown {
-        case dpe @ DynamicPruningExpression(inSub: InSubqueryExec) =>
-          inSub.plan match {
-            case bc: SubqueryBroadcastExec =>
-              dpe.copy(inSub.copy(plan = convertBroadcast(bc)))
-            case reuse @ ReusedSubqueryExec(bc: SubqueryBroadcastExec) =>
-              dpe.copy(inSub.copy(plan = reuse.copy(convertBroadcast(bc))))
-            case _ =>
-              dpe
-          }
-      }
-    }
-  }
-
-  override val childExprs: Seq[BaseExprMeta[_]] = {
-    // We want to leave the runtime filters as CPU expressions
-    p.output.map(GpuOverrides.wrapExpr(_, conf, Some(this)))
-  }
-
-  override val childScans: scala.Seq[ScanMeta[_]] =
-    Seq(GpuOverrides.wrapScan(p.scan, conf, Some(this)))
-
-  override def tagPlanForGpu(): Unit = {
-    if (!p.runtimeFilters.isEmpty && !childScans.head.supportsRuntimeFilters) {
-      willNotWorkOnGpu("runtime filtering (DPP) is not supported for this scan")
-    }
-  }
-
-  override def convertToCpu(): SparkPlan = {
-    val cpu = wrapped.copy(runtimeFilters = runtimeFilters)
-    cpu.copyTagsFrom(wrapped)
-    cpu
-  }
-
+    extends BatchScanExecMetaBase(p, conf, parent, rule) {
   override def convertToGpu(): GpuExec = {
     val spj = p.spjParams
     GpuBatchScanExec(p.output, childScans.head.convertToGpu(), runtimeFilters,
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala
new file mode 100644
index 00000000000..914702a289c
--- /dev/null
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/BatchScanExecMetaBase.scala
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350"}
+{"spark": "351"}
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+
+abstract class BatchScanExecMetaBase(p: BatchScanExec,
+   conf: RapidsConf,
+   parent: Option[RapidsMeta[_, _, _]],
+   rule: DataFromReplacementRule)
+  extends SparkPlanMeta[BatchScanExec](p, conf, parent, rule) {
+  // Replaces SubqueryBroadcastExec inside dynamic pruning filters with GPU counterpart
+  // if possible. Instead regarding filters as childExprs of current Meta, we create
+  // a new meta for SubqueryBroadcastExec. The reason is that the GPU replacement of
+  // BatchScanExec is independent from the replacement of the runtime filters. It is
+  // possible that the BatchScanExec is on the CPU, while the dynamic runtime filters
+  // are on the GPU. And vice versa.
+  protected lazy val runtimeFilters = {
+    val convertBroadcast = (bc: SubqueryBroadcastExec) => {
+      val meta = GpuOverrides.wrapAndTagPlan(bc, conf)
+      meta.tagForExplain()
+      meta.convertIfNeeded().asInstanceOf[BaseSubqueryExec]
+    }
+    wrapped.runtimeFilters.map { filter =>
+      filter.transformDown {
+        case dpe @ DynamicPruningExpression(inSub: InSubqueryExec) =>
+          inSub.plan match {
+            case bc: SubqueryBroadcastExec =>
+              dpe.copy(inSub.copy(plan = convertBroadcast(bc)))
+            case reuse @ ReusedSubqueryExec(bc: SubqueryBroadcastExec) =>
+              dpe.copy(inSub.copy(plan = reuse.copy(convertBroadcast(bc))))
+            case _ =>
+              dpe
+          }
+      }
+    }
+  }
+
+  override val childExprs: Seq[BaseExprMeta[_]] = {
+    // We want to leave the runtime filters as CPU expressions
+    p.output.map(GpuOverrides.wrapExpr(_, conf, Some(this)))
+  }
+
+  override val childScans: scala.Seq[ScanMeta[_]] =
+    Seq(GpuOverrides.wrapScan(p.scan, conf, Some(this)))
+
+  override def tagPlanForGpu(): Unit = {
+    if (!p.runtimeFilters.isEmpty && !childScans.head.supportsRuntimeFilters) {
+      willNotWorkOnGpu("runtime filtering (DPP) is not supported for this scan")
+    }
+  }
+
+  override def convertToCpu(): SparkPlan = {
+    val cpu = wrapped.copy(runtimeFilters = runtimeFilters)
+    cpu.copyTagsFrom(wrapped)
+    cpu
+  }
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
new file mode 100644
index 00000000000..71ad5ae1a0f
--- /dev/null
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "350"}
+{"spark": "351"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.PartitionedFileUtil
+import org.apache.spark.sql.execution.datasources.{FileStatusWithMetadata, PartitionedFile}
+
+object PartitionedFileUtilsShim extends PartitionedFileUtilsShimBase {
+  // In Spark 4.0, PartitionedFileUtil.splitFiles lost its `sparkSession` parameter.
+  // This pre-Spark-4.0 shim keeps the `sparkSession` parameter.
+  def splitFiles(sparkSession: SparkSession,
+                 file: FileStatusWithMetadata,
+                 isSplitable: Boolean,
+                 maxSplitBytes: Long,
+                 partitionValues: InternalRow): Seq[PartitionedFile] = {
+    PartitionedFileUtil.splitFiles(sparkSession, file, isSplitable, maxSplitBytes, partitionValues)
+  }
+}
diff --git a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
index 833767558c6..8f9bc5c1573 100644
--- a/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
+++ b/sql-plugin/src/main/spark350/scala/com/nvidia/spark/rapids/shims/PythonMapInArrowExecShims.scala
@@ -17,7 +17,6 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package com.nvidia.spark.rapids.shims
 
diff --git a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
index a08211f3795..c27f4824c4a 100644
--- a/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
+++ b/sql-plugin/src/main/spark350/scala/org/apache/spark/sql/rapids/shims/GpuPythonMapInArrowExecMeta.scala
@@ -17,7 +17,6 @@
 /*** spark-rapids-shim-json-lines
 {"spark": "350"}
 {"spark": "351"}
-{"spark": "400"}
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.shims
 
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
new file mode 100644
index 00000000000..e6c26eb65b8
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/BatchScanExecMeta.scala
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+
+class BatchScanExecMeta(p: BatchScanExec,
+    conf: RapidsConf,
+    parent: Option[RapidsMeta[_, _, _]],
+    rule: DataFromReplacementRule)
+    extends BatchScanExecMetaBase(p, conf, parent, rule) {
+  override def convertToGpu(): GpuExec = {
+    val spj = p.spjParams
+    GpuBatchScanExec(p.output, childScans.head.convertToGpu(), runtimeFilters,
+      p.ordering, p.table, spj)
+  }
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
new file mode 100644
index 00000000000..623005654fc
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/CudfUnsafeRow.scala
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.unsafe.types.VariantVal
+
+
+final class CudfUnsafeRow(
+   attributes: Array[Attribute],
+   remapping: Array[Int]) extends CudfUnsafeRowBase(attributes, remapping) {
+  def getVariant(ordinal: Int) = {
+    throw new UnsupportedOperationException("VariantVal is not supported")
+  }
+}
+
+object CudfUnsafeRow extends CudfUnsafeRowTrait
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
new file mode 100644
index 00000000000..4fc62d82df3
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuBatchScanExec.scala
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.google.common.base.Objects
+import com.nvidia.spark.rapids.GpuScan
+
+import org.apache.spark.SparkException
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, DynamicPruningExpression, Expression, Literal, RowOrdering, SortOrder}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, KeyGroupedShuffleSpec, Partitioning, SinglePartition}
+import org.apache.spark.sql.catalyst.util.{truncatedString, InternalRowComparableWrapper}
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.execution.datasources.rapids.DataSourceStrategyUtils
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceRDD, StoragePartitionJoinParams}
+import org.apache.spark.sql.internal.SQLConf
+
+case class GpuBatchScanExec(
+    output: Seq[AttributeReference],
+    @transient scan: GpuScan,
+    runtimeFilters: Seq[Expression] = Seq.empty,
+    ordering: Option[Seq[SortOrder]] = None,
+    @transient table: Table,
+    spjParams: StoragePartitionJoinParams = StoragePartitionJoinParams()
+  ) extends GpuBatchScanExecBase(scan, runtimeFilters) {
+
+  @transient override lazy val batch: Batch = if (scan == null) null else scan.toBatch
+  // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
+  override def equals(other: Any): Boolean = other match {
+    case other: GpuBatchScanExec =>
+      this.batch != null && this.batch == other.batch &&
+        this.runtimeFilters == other.runtimeFilters &&
+        this.spjParams == other.spjParams
+    case _ =>
+      false
+  }
+
+  override def hashCode(): Int = Objects.hashCode(batch, runtimeFilters)
+
+  @transient override lazy val inputPartitions: Seq[InputPartition] =
+    batch.planInputPartitions()
+
+  @transient override protected lazy val filteredPartitions: Seq[Seq[InputPartition]] = {
+    val dataSourceFilters = runtimeFilters.flatMap {
+      case DynamicPruningExpression(e) => DataSourceStrategyUtils.translateRuntimeFilter(e)
+      case _ => None
+    }
+
+    if (dataSourceFilters.nonEmpty) {
+      val originalPartitioning = outputPartitioning
+
+      // the cast is safe as runtime filters are only assigned if the scan can be filtered
+      val filterableScan = scan.asInstanceOf[SupportsRuntimeV2Filtering]
+      filterableScan.filter(dataSourceFilters.toArray)
+
+      // call toBatch again to get filtered partitions
+      val newPartitions = scan.toBatch.planInputPartitions()
+
+      originalPartitioning match {
+        case p: KeyGroupedPartitioning =>
+          if (newPartitions.exists(!_.isInstanceOf[HasPartitionKey])) {
+            throw new SparkException("Data source must have preserved the original partitioning " +
+              "during runtime filtering: not all partitions implement HasPartitionKey after " +
+              "filtering")
+          }
+
+          val newPartitionValues = newPartitions.map(partition =>
+            InternalRowComparableWrapper(partition.asInstanceOf[HasPartitionKey], p.expressions))
+            .toSet
+          val oldPartitionValues = p.partitionValues
+            .map(partition => InternalRowComparableWrapper(partition, p.expressions)).toSet
+          // We require the new number of partition values to be equal or less than the old number
+          // of partition values here. In the case of less than, empty partitions will be added for
+          // those missing values that are not present in the new input partitions.
+          if (oldPartitionValues.size < newPartitionValues.size) {
+            throw new SparkException("During runtime filtering, data source must either report " +
+              "the same number of partition values, or a subset of partition values from the " +
+              s"original. Before: ${oldPartitionValues.size} partition values. " +
+              s"After: ${newPartitionValues.size} partition values")
+          }
+
+          if (!newPartitionValues.forall(oldPartitionValues.contains)) {
+            throw new SparkException("During runtime filtering, data source must not report new " +
+              "partition values that are not present in the original partitioning.")
+          }
+          groupPartitions(newPartitions)
+            .map(_.groupedParts.map(_.parts)).getOrElse(Seq.empty)
+
+        case _ =>
+          // no validation is needed as the data source did not report any specific partitioning
+          newPartitions.map(Seq(_))
+      }
+
+    } else {
+      partitions
+    }
+  }
+
+  override def outputPartitioning: Partitioning = {
+    super.outputPartitioning match {
+      case k: KeyGroupedPartitioning if spjParams.commonPartitionValues.isDefined =>
+        // We allow duplicated partition values if
+        // `spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled` is true
+        val newPartValues = spjParams.commonPartitionValues.get.flatMap {
+          case (partValue, numSplits) => Seq.fill(numSplits)(partValue)
+        }
+        val expressions = spjParams.joinKeyPositions match {
+          case Some(projectionPositions) => projectionPositions.map(i => k.expressions(i))
+          case _ => k.expressions
+        }
+        k.copy(expressions = expressions, numPartitions = newPartValues.length,
+          partitionValues = newPartValues)
+      case p => p
+    }
+  }
+
+  override lazy val readerFactory: PartitionReaderFactory = batch.createReaderFactory()
+
+  override lazy val inputRDD: RDD[InternalRow] = {
+    val rdd = if (filteredPartitions.isEmpty && outputPartitioning == SinglePartition) {
+      // return an empty RDD with 1 partition if dynamic filtering removed the only split
+      sparkContext.parallelize(Array.empty[InternalRow], 1)
+    } else {
+      val finalPartitions = outputPartitioning match {
+        case p: KeyGroupedPartitioning =>
+          assert(spjParams.keyGroupedPartitioning.isDefined)
+          val expressions = spjParams.keyGroupedPartitioning.get
+
+          // Re-group the input partitions if we are projecting on a subset of join keys
+          val (groupedPartitions, partExpressions) = spjParams.joinKeyPositions match {
+            case Some(projectPositions) =>
+              val projectedExpressions = projectPositions.map(i => expressions(i))
+              val parts = filteredPartitions.flatten.groupBy(part => {
+                val row = part.asInstanceOf[HasPartitionKey].partitionKey()
+                val projectedRow = KeyGroupedPartitioning.project(
+                  expressions, projectPositions, row)
+                InternalRowComparableWrapper(projectedRow, projectedExpressions)
+              }).map { case (wrapper, splits) => (wrapper.row, splits) }.toSeq
+              (parts, projectedExpressions)
+            case _ =>
+              val groupedParts = filteredPartitions.map(splits => {
+                assert(splits.nonEmpty && splits.head.isInstanceOf[HasPartitionKey])
+                (splits.head.asInstanceOf[HasPartitionKey].partitionKey(), splits)
+              })
+              (groupedParts, expressions)
+          }
+
+          // Also re-group the partitions if we are reducing compatible partition expressions
+          val finalGroupedPartitions = spjParams.reducers match {
+            case Some(reducers) =>
+              val result = groupedPartitions.groupBy { case (row, _) =>
+                KeyGroupedShuffleSpec.reducePartitionValue(row, partExpressions, reducers)
+              }.map { case (wrapper, splits) => (wrapper.row, splits.flatMap(_._2)) }.toSeq
+              val rowOrdering = RowOrdering.createNaturalAscendingOrdering(
+                partExpressions.map(_.dataType))
+              result.sorted(rowOrdering.on((t: (InternalRow, _)) => t._1))
+            case _ => groupedPartitions
+          }
+
+          // When partially clustered, the input partitions are not grouped by partition
+          // values. Here we'll need to check `commonPartitionValues` and decide how to group
+          // and replicate splits within a partition.
+          if (spjParams.commonPartitionValues.isDefined && spjParams.applyPartialClustering) {
+            // A mapping from the common partition values to how many splits the partition
+            // should contain.
+            val commonPartValuesMap = spjParams.commonPartitionValues
+              .get
+              .map(t => (InternalRowComparableWrapper(t._1, partExpressions), t._2))
+              .toMap
+            val nestGroupedPartitions = finalGroupedPartitions.map { case (partValue, splits) =>
+              // `commonPartValuesMap` should contain the part value since it's the super set.
+              val numSplits = commonPartValuesMap
+                .get(InternalRowComparableWrapper(partValue, partExpressions))
+              assert(numSplits.isDefined, s"Partition value $partValue does not exist in " +
+                "common partition values from Spark plan")
+
+              val newSplits = if (spjParams.replicatePartitions) {
+                // We need to also replicate partitions according to the other side of join
+                Seq.fill(numSplits.get)(splits)
+              } else {
+                // Not grouping by partition values: this could be the side with partially
+                // clustered distribution. Because of dynamic filtering, we'll need to check if
+                // the final number of splits of a partition is smaller than the original
+                // number, and fill with empty splits if so. This is necessary so that both
+                // sides of a join will have the same number of partitions & splits.
+                splits.map(Seq(_)).padTo(numSplits.get, Seq.empty)
+              }
+              (InternalRowComparableWrapper(partValue, partExpressions), newSplits)
+            }
+
+            // Now fill missing partition keys with empty partitions
+            val partitionMapping = nestGroupedPartitions.toMap
+            spjParams.commonPartitionValues.get.flatMap {
+              case (partValue, numSplits) =>
+                // Use empty partition for those partition values that are not present.
+                partitionMapping.getOrElse(
+                  InternalRowComparableWrapper(partValue, partExpressions),
+                  Seq.fill(numSplits)(Seq.empty))
+            }
+          } else {
+            // either `commonPartitionValues` is not defined, or it is defined but
+            // `applyPartialClustering` is false.
+            val partitionMapping = finalGroupedPartitions.map { case (partValue, splits) =>
+              InternalRowComparableWrapper(partValue, partExpressions) -> splits
+            }.toMap
+
+            // In case `commonPartitionValues` is not defined (e.g., SPJ is not used), there
+            // could exist duplicated partition values, as partition grouping is not done
+            // at the beginning and postponed to this method. It is important to use unique
+            // partition values here so that grouped partitions won't get duplicated.
+            p.uniquePartitionValues.map { partValue =>
+              // Use empty partition for those partition values that are not present
+              partitionMapping.getOrElse(
+                InternalRowComparableWrapper(partValue, partExpressions), Seq.empty)
+            }
+          }
+
+        case _ => filteredPartitions
+      }
+
+      new DataSourceRDD(
+        sparkContext, finalPartitions, readerFactory, supportsColumnar, customMetrics)
+    }
+    postDriverMetrics()
+    rdd
+  }
+
+  override def keyGroupedPartitioning: Option[Seq[Expression]] =
+    spjParams.keyGroupedPartitioning
+
+  override def doCanonicalize(): GpuBatchScanExec = {
+    this.copy(
+      output = output.map(QueryPlan.normalizeExpressions(_, output)),
+      runtimeFilters = QueryPlan.normalizePredicates(
+        runtimeFilters.filterNot(_ == DynamicPruningExpression(Literal.TrueLiteral)),
+        output))
+  }
+
+  override def simpleString(maxFields: Int): String = {
+    val truncatedOutputString = truncatedString(output, "[", ", ", "]", maxFields)
+    val runtimeFiltersString = s"RuntimeFilters: ${runtimeFilters.mkString("[", ",", "]")}"
+    val result = s"$nodeName$truncatedOutputString ${scan.description()} $runtimeFiltersString"
+    redact(result)
+  }
+
+  override def nodeName: String = {
+    s"GpuBatchScan ${table.name()}".trim
+  }
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
new file mode 100644
index 00000000000..8c6a9c793f2
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/GpuOrcDataReader.scala
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids.GpuMetric
+import org.apache.hadoop.conf.Configuration
+import org.apache.orc.impl.DataReaderProperties
+
+class GpuOrcDataReader(
+    props: DataReaderProperties,
+    conf: Configuration,
+    metrics: Map[String, GpuMetric]) extends GpuOrcDataReader320Plus(props, conf, metrics) {
+  override def releaseAllBuffers(): Unit = {
+    throw new IllegalStateException("should not be trying to release buffers")
+  }
+}
+
+
+object GpuOrcDataReader {
+  // File cache is being used, so we want read ranges that can be cached separately
+  val shouldMergeDiskRanges: Boolean = false
+}
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/MapInArrowExecShims.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/MapInArrowExecShims.scala
new file mode 100644
index 00000000000..4a1998fa88d
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/MapInArrowExecShims.scala
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.python.MapInArrowExec
+import org.apache.spark.sql.rapids.shims.GpuMapInArrowExecMeta
+
+object PythonMapInArrowExecShims {
+
+  def execs: Map[Class[_ <: SparkPlan], ExecRule[_ <: SparkPlan]] = Seq(
+    GpuOverrides.exec[MapInArrowExec](
+      "The backend for Map Arrow Iterator UDF. Accelerates the data transfer between the" +
+        " Java process and the Python process. It also supports scheduling GPU resources" +
+        " for the Python process when enabled.",
+      ExecChecks((TypeSig.commonCudfTypes + TypeSig.ARRAY + TypeSig.STRUCT).nested(),
+        TypeSig.all),
+      (mapPy, conf, p, r) => new GpuMapInArrowExecMeta(mapPy, conf, p, r))
+  ).map(r => (r.getClassFor.asSubclass(classOf[SparkPlan]), r)).toMap
+
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
new file mode 100644
index 00000000000..de8e98962a7
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/PartitionedFileUtilsShim.scala
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.PartitionedFileUtil
+import org.apache.spark.sql.execution.datasources.{FileStatusWithMetadata, PartitionedFile}
+
+object PartitionedFileUtilsShim extends PartitionedFileUtilsShimBase {
+
+  // In Spark 4.0, PartitionedFileUtil.splitFiles lost its `sparkSession` parameter.
+  // This Spark-4.0+ shim ignores the `sparkSession` parameter.
+  def splitFiles(sparkSession: SparkSession,
+                 file: FileStatusWithMetadata,
+                 isSplitable: Boolean,
+                 maxSplitBytes: Long,
+                 partitionValues: InternalRow): Seq[PartitionedFile] = {
+    PartitionedFileUtil.splitFiles(file, isSplitable, maxSplitBytes, partitionValues)
+  }
+
+} // object PartitionFileUtilsShim;
diff --git a/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
new file mode 100644
index 00000000000..70d40fc19a0
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/com/nvidia/spark/rapids/shims/RaiseErrorShim.scala
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package com.nvidia.spark.rapids.shims
+
+import com.nvidia.spark.rapids.{ExprRule, GpuOverrides}
+import com.nvidia.spark.rapids.{ExprChecks, GpuExpression, TypeSig, UnaryExprMeta}
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, RaiseError}
+
+object RaiseErrorShim {
+  val exprs: Map[Class[_ <: Expression], ExprRule[_ <: Expression]] = Map.empty
+}
diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
new file mode 100644
index 00000000000..f5858e4cfd6
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/hive/rapids/shims/CommandUtilsShim.scala
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.hive.rapids.shims
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command.CommandUtils
+
+object CommandUtilsShim {
+
+  // Shim for CommandUtils.uncacheTableOrView, whose signature changed in Apache Spark 4.0.
+  def uncacheTableOrView(sparkSession: SparkSession, tableId: TableIdentifier): Unit = {
+    CommandUtils.uncacheTableOrView(sparkSession, tableId)
+  }
+
+}
\ No newline at end of file
diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
new file mode 100644
index 00000000000..4650d998fd7
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/execution/python/shims/WritePythonUDFUtils.scala
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.execution.python.shims
+
+import java.io.DataOutputStream
+
+import org.apache.spark.api.python.ChainedPythonFunctions
+import org.apache.spark.sql.execution.python.PythonUDFRunner
+
+object WritePythonUDFUtils {
+  def writeUDFs(
+      dataOut: DataOutputStream,
+      funcs: Seq[(ChainedPythonFunctions, Long)],
+      argOffsets: Array[Array[Int]],
+      profiler: Option[String] = None): Unit = {
+    PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets, profiler)
+  }
+}
diff --git a/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/GpuMapInArrowExecMeta.scala b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/GpuMapInArrowExecMeta.scala
new file mode 100644
index 00000000000..f7010099813
--- /dev/null
+++ b/sql-plugin/src/main/spark400/scala/org/apache/spark/sql/rapids/shims/GpuMapInArrowExecMeta.scala
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*** spark-rapids-shim-json-lines
+{"spark": "400"}
+spark-rapids-shim-json-lines ***/
+package org.apache.spark.sql.rapids.shims
+
+import com.nvidia.spark.rapids._
+
+import org.apache.spark.api.python.PythonEvalType
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PythonUDF}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.python.MapInArrowExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
+import org.apache.spark.sql.rapids.execution.python.GpuMapInBatchExec
+import org.apache.spark.sql.types.{BinaryType, StringType}
+
+class GpuMapInArrowExecMeta(
+    mapArrow: MapInArrowExec,
+    conf: RapidsConf,
+    parent: Option[RapidsMeta[_, _, _]],
+    rule: DataFromReplacementRule)
+  extends SparkPlanMeta[MapInArrowExec](mapArrow, conf, parent, rule) {
+  override def replaceMessage: String = "partially run on GPU"
+
+  override def noReplacementPossibleMessage(reasons: String): String =
+    s"cannot run even partially on the GPU because $reasons"
+
+  protected val udf: BaseExprMeta[PythonUDF] = GpuOverrides.wrapExpr(
+    mapArrow.func.asInstanceOf[PythonUDF], conf, Some(this))
+  protected val resultAttrs: Seq[BaseExprMeta[Attribute]] =
+    mapArrow.output.map(GpuOverrides.wrapExpr(_, conf, Some(this)))
+
+  override val childExprs: Seq[BaseExprMeta[_]] = resultAttrs :+ udf
+
+  override def tagPlanForGpu(): Unit = {
+    super.tagPlanForGpu()
+    if (SQLConf.get.getConf(SQLConf.ARROW_EXECUTION_USE_LARGE_VAR_TYPES)) {
+
+      val inputTypes = mapArrow.child.schema.fields.map(_.dataType)
+      val outputTypes = mapArrow.output.map(_.dataType)
+
+      val hasStringOrBinaryTypes = (inputTypes ++ outputTypes).exists(dataType =>
+        TrampolineUtil.dataTypeExistsRecursively(dataType,
+          dt => dt == StringType || dt == BinaryType))
+
+      if (hasStringOrBinaryTypes) {
+        willNotWorkOnGpu(s"${SQLConf.ARROW_EXECUTION_USE_LARGE_VAR_TYPES.key} is " +
+          s"enabled and the schema contains string or binary types. This is not " +
+          s"supported on the GPU.")
+      }
+    }
+  }
+
+  override def convertToGpu(): GpuExec =
+    GpuMapInArrowExec(
+      udf.convertToGpu(),
+      resultAttrs.map(_.convertToGpu()).asInstanceOf[Seq[Attribute]],
+      childPlans.head.convertIfNeeded(),
+      isBarrier = mapArrow.isBarrier,
+    )
+}
+
+/*
+ * A relation produced by applying a function that takes an iterator of PyArrow's record
+ * batches and outputs an iterator of PyArrow's record batches.
+ *
+ * This GpuMapInPandasExec aims at accelerating the data transfer between
+ * JVM and Python, and scheduling GPU resources for its Python processes.
+ *
+ */
+case class GpuMapInArrowExec(
+    func: Expression,
+    output: Seq[Attribute],
+    child: SparkPlan,
+    override val isBarrier: Boolean) extends GpuMapInBatchExec {
+
+  override protected val pythonEvalType: Int = PythonEvalType.SQL_MAP_ARROW_ITER_UDF
+}
diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionRewriteSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionRewriteSuite.scala
index a9ef6364aac..7626c1450c1 100644
--- a/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionRewriteSuite.scala
+++ b/tests/src/test/scala/com/nvidia/spark/rapids/RegularExpressionRewriteSuite.scala
@@ -40,9 +40,9 @@ class RegularExpressionRewriteSuite extends AnyFunSuite {
   test("regex rewrite contains") {
     import RegexOptimizationType._
     val patterns = Seq(".*abc.*", ".*(abc).*", "^.*(abc).*$", "^.*(.*)(abc).*.*", 
-        raw".*\w.*\Z", raw".*..*\Z")
-    val excepted = Seq(Contains("abc"), Contains("abc"), NoOptimization, Contains("abc"), 
-        NoOptimization, NoOptimization)
+        raw".*\w.*\Z", raw".*..*\Z", "^(.*)(abc)")
+    val excepted = Seq(Contains("abc"), Contains("abc"), NoOptimization, NoOptimization, 
+        NoOptimization, NoOptimization, NoOptimization)
     verifyRewritePattern(patterns, excepted)
   }
 
@@ -53,12 +53,42 @@ class RegularExpressionRewriteSuite extends AnyFunSuite {
       "(.*)abc[0-9a-z]{1,3}(.*)",
       "(.*)abc[0-9]{2}.*",
       "^abc[0-9]{1,3}",
-      "火花急流[\u4e00-\u9fa5]{1}")
-    val excepted = Seq(PrefixRange("abc", 1, 48, 57),
-      NoOptimization,
-      PrefixRange("abc", 2, 48, 57),
+      "火花急流[\u4e00-\u9fa5]{1}",
+      "^[0-9]{6}",
+      "^[0-9]{3,10}",
+      "^.*[0-9]{6}",
+      "^(.*)[0-9]{3,10}"
+    )
+    val excepted = Seq(
       PrefixRange("abc", 1, 48, 57),
-      PrefixRange("火花急流", 1, 19968, 40869))
+      NoOptimization, // prefix followed by a multi-range not supported
+      PrefixRange("abc", 2, 48, 57),
+      NoOptimization, // starts with PrefixRange not supported
+      PrefixRange("火花急流", 1, 19968, 40869),
+      NoOptimization, // starts with PrefixRange not supported
+      NoOptimization, // starts with PrefixRange not supported
+      NoOptimization, // .* can't match line break so can't be optimized
+      NoOptimization  // .* can't match line break so can't be optimized
+    )
+    verifyRewritePattern(patterns, excepted)
+  }
+
+  test("regex rewrite multiple contains") {
+    import RegexOptimizationType._
+    val patterns = Seq(
+      "(abc|def).*",
+      ".*(abc|def|ghi).*",
+      "((abc)|(def))",
+      "(abc)|(def)",
+      "(火花|急流)"
+    )
+    val excepted = Seq(
+      MultipleContains(Seq("abc", "def")),
+      MultipleContains(Seq("abc", "def", "ghi")),
+      MultipleContains(Seq("abc", "def")),
+      MultipleContains(Seq("abc", "def")),
+      MultipleContains(Seq("火花", "急流"))
+    )
     verifyRewritePattern(patterns, excepted)
   }
 }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
index 82ce1073e13..a606dba0572 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/GpuInSubqueryExecSuite.scala
@@ -65,7 +65,7 @@ class GpuInSubqueryExecSuite extends SparkQueryCompareTestSuite {
 
   private def buildCpuInSubqueryPlan(
       spark: SparkSession,
-      shouldBroadcast: Boolean): SparkPlan = {
+      shouldBroadcastOrDpp: Boolean): SparkPlan = {
     val df1ReadExec = readToPhysicalPlan(nullableStringsIntsDf(spark))
     val df2ReadExec = readToPhysicalPlan(subqueryTable(spark))
     val inSubquery = InSubqueryExec(
@@ -73,16 +73,19 @@ class GpuInSubqueryExecSuite extends SparkQueryCompareTestSuite {
       SubqueryExec("sbe",
         ProjectExec(Seq(df2ReadExec.output.head), df2ReadExec)),
       ExprId(7),
-      shouldBroadcast=shouldBroadcast)
+      shouldBroadcastOrDpp)
     FilterExec(DynamicPruningExpression(inSubquery), df1ReadExec)
   }
 
-  for (shouldBroadcast <- Seq(false, true)) {
-    test(s"InSubqueryExec shouldBroadcast=$shouldBroadcast") {
+  /**
+   * The named parameter shouldBroadcast was renamed to isDynamicPruning in Spark 4.0.0+
+   */
+  for (shouldBroadcastOrDpp <- Seq(false, true)) {
+    test(s"InSubqueryExec shouldBroadcastOrDpp=$shouldBroadcastOrDpp") {
       val gpuResults = withGpuSparkSession({ spark =>
         val overrides = new GpuOverrides()
         val transitionOverrides = new GpuTransitionOverrides()
-        val cpuPlan = buildCpuInSubqueryPlan(spark, shouldBroadcast)
+        val cpuPlan = buildCpuInSubqueryPlan(spark, shouldBroadcastOrDpp)
         val gpuPlan = transitionOverrides(overrides(cpuPlan))
         gpuPlan.execute().collect()
       })
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala
index 5a394a5b0e8..dba811c073c 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsDataFrameAggregateSuite.scala
@@ -19,12 +19,67 @@
 spark-rapids-shim-json-lines ***/
 package org.apache.spark.sql.rapids.suites
 
-import org.apache.spark.sql.DataFrameAggregateSuite
+import org.apache.spark.sql.{DataFrameAggregateSuite, Row}
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.rapids.utils.RapidsSQLTestsTrait
+import org.apache.spark.sql.types._
 
 class RapidsDataFrameAggregateSuite extends DataFrameAggregateSuite with RapidsSQLTestsTrait {
-  // example to show how to replace the logic of an excluded test case in Vanilla Spark
-  testRapids("collect functions" ) {  // "collect functions" was excluded at RapidsTestSettings
-    // println("...")
+  import testImplicits._
+
+  testRapids("collect functions") {
+    val df = Seq((1, 2), (2, 2), (3, 4)).toDF("a", "b")
+    checkAnswer(
+      df.select(sort_array(collect_list($"a")), sort_array(collect_list($"b"))),
+      Seq(Row(Seq(1, 2, 3), Seq(2, 2, 4)))
+    )
+    checkAnswer(
+      df.select(sort_array(collect_set($"a")), sort_array(collect_set($"b"))),
+      Seq(Row(Seq(1, 2, 3), Seq(2, 4)))
+    )
+
+    checkDataset(
+      df.select(sort_array(collect_set($"a")).as("aSet")).as[Set[Int]],
+      Set(1, 2, 3))
+    checkDataset(
+      df.select(sort_array(collect_set($"b")).as("bSet")).as[Set[Int]],
+      Set(2, 4))
+    checkDataset(
+      df.select(sort_array(collect_set($"a")), sort_array(collect_set($"b")))
+        .as[(Set[Int], Set[Int])], Seq(Set(1, 2, 3) -> Set(2, 4)): _*)
+  }
+
+  testRapids("collect functions structs") {
+    val df = Seq((1, 2, 2), (2, 2, 2), (3, 4, 1))
+      .toDF("a", "x", "y")
+      .select($"a", struct($"x", $"y").as("b"))
+    checkAnswer(
+      df.select(sort_array(collect_list($"a")), sort_array(collect_list($"b"))),
+      Seq(Row(Seq(1, 2, 3), Seq(Row(2, 2), Row(2, 2), Row(4, 1))))
+    )
+    checkAnswer(
+      df.select(sort_array(collect_set($"a")), sort_array(collect_set($"b"))),
+      Seq(Row(Seq(1, 2, 3), Seq(Row(2, 2), Row(4, 1))))
+    )
+  }
+
+  testRapids("SPARK-17641: collect functions should not collect null values") {
+    val df = Seq(("1", 2), (null, 2), ("1", 4)).toDF("a", "b")
+    checkAnswer(
+      df.select(sort_array(collect_list($"a")), sort_array(collect_list($"b"))),
+      Seq(Row(Seq("1", "1"), Seq(2, 2, 4)))
+    )
+    checkAnswer(
+      df.select(sort_array(collect_set($"a")), sort_array(collect_set($"b"))),
+      Seq(Row(Seq("1"), Seq(2, 4)))
+    )
+  }
+
+  testRapids("collect functions should be able to cast to array type with no null values") {
+    val df = Seq(1, 2).toDF("a")
+    checkAnswer(df.select(sort_array(collect_list("a")) cast ArrayType(IntegerType, false)),
+      Seq(Row(Seq(1, 2))))
+    checkAnswer(df.select(sort_array(collect_set("a")) cast ArrayType(FloatType, false)),
+      Seq(Row(Seq(1.0, 2.0))))
   }
 }
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
index 3e9f685dfdc..ef9ae630dfd 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/suites/RapidsJsonSuite.scala
@@ -31,10 +31,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class RapidsJsonSuite
   extends JsonSuite with RapidsSQLTestsBaseTrait with RapidsJsonConfTrait {
-  /** Returns full path to the given file in the resource folder */
-  override protected def testFile(fileName: String): String = {
-    getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName
-  }
 }
 
 class RapidsJsonV1Suite extends RapidsJsonSuite with RapidsSQLTestsBaseTrait {
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
index 83396e977fa..e1aec1ffebc 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/BackendTestSettings.scala
@@ -83,6 +83,7 @@ abstract class BackendTestSettings {
   // or a description like "This simply can't work on GPU".
   // It should never be "unknown" or "need investigation"
   case class KNOWN_ISSUE(reason: String) extends ExcludeReason
+  case class ADJUST_UT(reason: String) extends ExcludeReason
   case class WONT_FIX_ISSUE(reason: String) extends ExcludeReason
 
 
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
index 6db9e8b71a6..f8b9d21d169 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsSQLTestsBaseTrait.scala
@@ -21,27 +21,36 @@ package org.apache.spark.sql.rapids.utils
 
 import java.util.{Locale, TimeZone}
 
+import org.apache.hadoop.fs.FileUtil
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, ShuffleQueryStageExec}
+import org.apache.spark.sql.rapids.execution.TrampolineUtil
 import org.apache.spark.sql.rapids.utils.RapidsTestConstants.RAPIDS_TEST
 import org.apache.spark.sql.test.SharedSparkSession
 
 
 /** Basic trait for Rapids SQL test cases. */
 trait RapidsSQLTestsBaseTrait extends SharedSparkSession with RapidsTestsBaseTrait {
-
   protected override def afterAll(): Unit = {
     // SparkFunSuite will set this to true, and forget to reset to false
     System.clearProperty(IS_TESTING.key)
     super.afterAll()
   }
 
+  override protected def testFile(fileName: String): String = {
+    import RapidsSQLTestsBaseTrait.sparkTestResourcesDir
+
+    java.nio.file.Paths.get(sparkTestResourcesDir(getClass).toString, fileName)
+      .toString
+  }
+
   protected def testRapids(testName: String, testTag: Tag*)(testFun: => Any)(implicit
       pos: Position): Unit = {
     test(RAPIDS_TEST + testName, testTag: _*)(testFun)
@@ -107,7 +116,40 @@ trait RapidsSQLTestsBaseTrait extends SharedSparkSession with RapidsTestsBaseTra
   }
 }
 
-object RapidsSQLTestsBaseTrait {
+object RapidsSQLTestsBaseTrait extends Logging {
+  private val resourceMap = scala.collection.mutable.Map.empty[String, java.nio.file.Path]
+  private val testJarUrlRegex = raw"jar:file:(/.*-tests.jar)!.*".r
+  TrampolineUtil.addShutdownHook(10000, () => {
+    resourceMap.valuesIterator.foreach { dirPath =>
+      logWarning(s"Deleting expanded test jar dir $dirPath")
+      FileUtil.fullyDelete(dirPath.toFile)
+    }
+  })
+
+  private def expandJar(jarPath: String): java.nio.file.Path = {
+    val jarFile = new java.io.File(jarPath)
+    val destDir = java.nio.file.Files.createTempDirectory(jarFile.getName + ".expanded")
+    logWarning(s"Registering $destDir for deletion on exit")
+    FileUtil.unZip(jarFile, destDir.toFile)
+    destDir
+  }
+
+  def sparkTestResourcesDir(testClass: Class[_]): java.nio.file.Path = {
+    var sparkTestClass = testClass
+    while (sparkTestClass.getName.contains("rapids")) {
+      sparkTestClass = sparkTestClass.getSuperclass
+    }
+    val sparkTestClassResource = "/" + sparkTestClass.getName.replace(".", "/") + ".class"
+    val resourceURL = sparkTestClass.getResource(sparkTestClassResource).toString
+    val resourceJar = resourceURL match {
+      case testJarUrlRegex(testJarPath) => testJarPath
+      case _ => sys.error(s"Could not extract tests jar path from $resourceURL")
+    }
+    this.synchronized {
+      resourceMap.getOrElseUpdate(resourceJar, expandJar(resourceJar))
+    }
+  }
+
   def nativeSparkConf(origin: SparkConf, warehouse: String): SparkConf = {
     // Timezone is fixed to UTC to allow timestamps to work by default
     TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
index 3ccd478d368..4cf155041d9 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestSettings.scala
@@ -34,12 +34,14 @@ class RapidsTestSettings extends BackendTestSettings {
     .exclude("SPARK-35735: Take into account day-time interval fields in cast", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771"))
     .exclude("casting to fixed-precision decimals", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771"))
     .exclude("SPARK-32828: cast from a derived user-defined type to a base type", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771"))
+    .exclude("cast string to timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771"))
+    .exclude("cast string to date", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10771"))
   enableSuite[RapidsDataFrameAggregateSuite]
-    .exclude("collect functions", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
-    .exclude("collect functions structs", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
-    .exclude("collect functions should be able to cast to array type with no null values", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
-    .exclude("SPARK-17641: collect functions should not collect null values", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
-    .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10772"))
+    .exclude("collect functions", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
+    .exclude("collect functions structs", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
+    .exclude("collect functions should be able to cast to array type with no null values", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
+    .exclude("SPARK-17641: collect functions should not collect null values", ADJUST_UT("order of elements in the array is non-deterministic in collect"))
+    .exclude("SPARK-19471: AggregationIterator does not initialize the generated result projection before using it", WONT_FIX_ISSUE("Codegen related UT, not applicable for GPU"))
     .exclude("SPARK-24788: RelationalGroupedDataset.toString with unresolved exprs should not fail", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10801"))
   enableSuite[RapidsJsonExpressionsSuite]
     .exclude("from_json - invalid data", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10849"))
@@ -66,20 +68,10 @@ class RapidsTestSettings extends BackendTestSettings {
   enableSuite[RapidsJsonSuite]
     .exclude("Casting long as timestamp", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
     .exclude("Write timestamps correctly with timestampFormat option and timeZone option", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: json in UTF-16 with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: multi-line json in UTF-32BE with BOM", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: Use user's encoding in reading of multi-line json in UTF-16LE", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: Unsupported encoding name", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: checking that the encoding option is case agnostic", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23723: specified encoding is not matched to actual encoding", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-23724: lineSep should be set if encoding if different from UTF-8", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-31716: inferring should handle malformed input", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
-    .exclude("SPARK-24190: restrictions for JSONOptions in read", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
     .exclude("exception mode for parsing date/timestamp string", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10773"))
   enableSuite[RapidsMathFunctionsSuite]
   enableSuite[RapidsRegexpExpressionsSuite]
   enableSuite[RapidsStringExpressionsSuite]
-    .exclude("concat", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("string substring_index function", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22498: Concat should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
     .exclude("SPARK-22549: ConcatWs should not generate codes beyond 64KB", KNOWN_ISSUE("https://github.com/NVIDIA/spark-rapids/issues/10775"))
diff --git a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
index bcac0b8fe2d..69bd4532c71 100644
--- a/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
+++ b/tests/src/test/spark330/scala/org/apache/spark/sql/rapids/utils/RapidsTestsTrait.scala
@@ -110,6 +110,30 @@ trait RapidsTestsTrait extends RapidsTestsCommonTrait {
         // .config("spark.rapids.sql.test.enabled", "true")
         // .config("spark.rapids.sql.test.allowedNonGpu",
         // "SerializeFromObjectExec,DeserializeToObjectExec,ExternalRDDScanExec")
+        .config("spark.rapids.sql.castStringToTimestamp.enabled", "true")
+        .config("spark.rapids.sql.csv.read.decimal.enabled", "true")
+        .config("spark.rapids.sql.format.avro.enabled", "true")
+        .config("spark.rapids.sql.format.avro.read.enabled", "true")
+        .config("spark.rapids.sql.format.hive.text.write.enabled", "true")
+        .config("spark.rapids.sql.format.json.enabled", "true")
+        .config("spark.rapids.sql.format.json.read.enabled", "true")
+        .config("spark.rapids.sql.incompatibleDateFormats.enabled", "true")
+        .config("spark.rapids.sql.python.gpu.enabled", "true")
+        .config("spark.rapids.sql.rowBasedUDF.enabled", "true")
+        .config("spark.rapids.sql.window.collectList.enabled", "true")
+        .config("spark.rapids.sql.window.collectSet.enabled", "true")
+        .config("spark.rapids.sql.window.range.byte.enabled", "true")
+        .config("spark.rapids.sql.window.range.short.enabled", "true")
+        .config("spark.rapids.sql.expression.Ascii", "true")
+        .config("spark.rapids.sql.expression.Conv", "true")
+        .config("spark.rapids.sql.expression.GetJsonObject", "true")
+        .config("spark.rapids.sql.expression.JsonToStructs", "true")
+        .config("spark.rapids.sql.expression.JsonTuple", "true")
+        .config("spark.rapids.sql.expression.StructsToJson", "true")
+        .config("spark.rapids.sql.exec.CollectLimitExec", "true")
+        .config("spark.rapids.sql.exec.FlatMapCoGroupsInPandasExec", "true")
+        .config("spark.rapids.sql.exec.WindowInPandasExec", "true")
+        .config("spark.rapids.sql.hasExtendedYearValues", "false")
         .appName("rapids spark plugin running Vanilla Spark UT")
 
       _spark = sparkBuilder