From 114b93a02c66a8e24d7fdcc2e6f2fabb8dc090fe Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 24 Dec 2024 17:37:32 +0800 Subject: [PATCH] Check Hybrid jar in executor --- .../spark/rapids/HybridExecutionUtils.scala | 37 +++++++++++++++++++ .../com/nvidia/spark/rapids/Plugin.scala | 5 +++ .../shims/HybridFileSourceScanExecMeta.scala | 19 +--------- 3 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 sql-plugin/src/main/scala/com/nvidia/spark/rapids/HybridExecutionUtils.scala diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HybridExecutionUtils.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HybridExecutionUtils.scala new file mode 100644 index 00000000000..319334c77bb --- /dev/null +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/HybridExecutionUtils.scala @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +object HybridExecutionUtils { + + private val HYBRID_JAR_PLUGIN_CLASS_NAME = "com.nvidia.spark.rapids.hybrid.HybridPluginWrapper" + + /** + * Check if the Hybrid jar is in the classpath, + * report error if not + */ + def checkHybridJarInClassPath(): Unit = { + try { + Class.forName(HYBRID_JAR_PLUGIN_CLASS_NAME) + } catch { + case e: ClassNotFoundException => throw new RuntimeException( + "Hybrid jar is not in the classpath, Please add Hybrid jar into the class path, or " + + "Please disable Hybrid feature by setting " + + "spark.rapids.sql.parquet.useHybridReader=false", e) + } + } +} diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala index eff98332e24..62de05961e7 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/Plugin.scala @@ -527,6 +527,11 @@ class RapidsExecutorPlugin extends ExecutorPlugin with Logging { // Fail if there are multiple plugin jars in the classpath. RapidsPluginUtils.detectMultipleJars(conf) + // Check Hybrid jar if needed. + if (conf.useHybridParquetReader) { + HybridExecutionUtils.checkHybridJarInClassPath() + } + // Compare if the cudf version mentioned in the classpath is equal to the version which // plugin expects. If there is a version mismatch, throw error. This check can be disabled // by setting this config spark.rapids.cudfVersionOverride=true diff --git a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HybridFileSourceScanExecMeta.scala b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HybridFileSourceScanExecMeta.scala index 269cac554ab..4d86ca69f4c 100644 --- a/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HybridFileSourceScanExecMeta.scala +++ b/sql-plugin/src/main/spark320/scala/com/nvidia/spark/rapids/shims/HybridFileSourceScanExecMeta.scala @@ -100,8 +100,6 @@ class HybridFileSourceScanExecMeta(plan: FileSourceScanExec, } object HybridFileSourceScanExecMeta { - private val HYBRID_JAR_PLUGIN_CLASS_NAME = "com.nvidia.spark.rapids.hybrid.HybridPluginWrapper" - // Determines whether using HybridScan or GpuScan def useHybridScan(conf: RapidsConf, fsse: FileSourceScanExec): Boolean = { val isEnabled = if (conf.useHybridParquetReader) { @@ -148,7 +146,7 @@ object HybridFileSourceScanExecMeta { */ def checkRuntimes(v1DataSourceList: String): Unit = { checkNotRunningCDHorDatabricks() - checkHybridJarInClassPath() + HybridExecutionUtils.checkHybridJarInClassPath() checkJavaVersion() checkScalaVersion() checkV1Datasource(v1DataSourceList) @@ -166,21 +164,6 @@ object HybridFileSourceScanExecMeta { } } - /** - * Check if the Hybrid jar is in the classpath, - * report error if not - */ - private def checkHybridJarInClassPath(): Unit = { - try { - Class.forName(HYBRID_JAR_PLUGIN_CLASS_NAME) - } catch { - case e: ClassNotFoundException => throw new RuntimeException( - "Hybrid jar is not in the classpath, Please add Hybrid jar into the class path, or " + - "Please disable Hybrid feature by setting " + - "spark.rapids.sql.parquet.useHybridReader=false", e) - } - } - /** * Hybrid feature only supports 1.8 Java version, * report error if not