From 58b7f4815fe9b756010c53e58eabe6204b6797ea Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 12 Nov 2024 13:17:14 +0800 Subject: [PATCH] Merge C2C code to main Signed-off-by: Chong Gao --- integration_tests/run_pyspark_from_build.sh | 15 ++ pom.xml | 21 ++ sql-plugin/pom.xml | 28 ++ .../nvidia/spark/rapids/GpuParquetScan.scala | 62 +++-- .../nvidia/spark/rapids/GpuParquetUtils.scala | 29 +- .../spark/rapids/GpuRowToColumnarExec.scala | 11 +- .../com/nvidia/spark/rapids/RapidsConf.scala | 59 ++++ .../velox/VeloxColumnarBatchConverter.scala | 242 +++++++++++++++++ .../velox/VeloxFileSourceScanExec.scala | 233 ++++++++++++++++ .../apache/spark/rapids/velox/VeloxHDFS.scala | 133 +++++++++ .../rapids/velox/VeloxParquetScanRDD.scala | 253 ++++++++++++++++++ .../VeloxFileSourceScanExecMeta.scala | 136 ++++++++++ 12 files changed, 1196 insertions(+), 26 deletions(-) create mode 100644 sql-plugin/src/main/scala/org/apache/spark/rapids/velox/VeloxColumnarBatchConverter.scala create mode 100644 sql-plugin/src/main/scala/org/apache/spark/rapids/velox/VeloxFileSourceScanExec.scala create mode 100644 sql-plugin/src/main/scala/org/apache/spark/rapids/velox/VeloxHDFS.scala create mode 100644 sql-plugin/src/main/scala/org/apache/spark/rapids/velox/VeloxParquetScanRDD.scala create mode 100644 sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/spark320/VeloxFileSourceScanExecMeta.scala diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index 9bd72b2ada0..b3a474e7565 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -364,6 +364,21 @@ EOF fi export PYSP_TEST_spark_rapids_memory_gpu_allocSize=${PYSP_TEST_spark_rapids_memory_gpu_allocSize:-'1536m'} + if [[ "$VELOX_TEST" -eq 1 ]]; then + if [ -z "${VELOX_JARS}" ]; then + echo "Error: Environment VELOX_JARS is not set." + exit 1 + fi + export PYSP_TEST_spark_jars="${PYSP_TEST_spark_jars},${VELOX_JARS//:/,}" + export PYSP_TEST_spark_memory_offHeap_enabled=true + export PYSP_TEST_spark_memory_offHeap_size=512M + export PYSP_TEST_spark_gluten_loadLibFromJar=true + export PYSP_TEST_spark_rapids_sql_loadVelox=true + if [[ "$VELOX_HDFS_TEST" -eq 1 ]]; then + export PYSP_TEST_spark_rapids_sql_velox_useVeloxHDFS=true + fi + fi + SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}" if [[ "${SPARK_SHELL_SMOKE_TEST}" != "0" ]]; then echo "Running spark-shell smoke test..." diff --git a/pom.xml b/pom.xml index 893af998540..677ec189752 100644 --- a/pom.xml +++ b/pom.xml @@ -825,6 +825,7 @@ false true 0.14.1 + 1.2.0