forked from NVIDIA/spark-rapids
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/branch-23.12' into implicitJDKPr…
…ofiles2
- Loading branch information
Showing
30 changed files
with
3,741 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Copyright (c) 2023, NVIDIA CORPORATION. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
|
||
<parent> | ||
<groupId>com.nvidia</groupId> | ||
<artifactId>rapids-4-spark-parent_2.12</artifactId> | ||
<version>23.12.0-SNAPSHOT</version> | ||
<relativePath>../../pom.xml</relativePath> | ||
</parent> | ||
|
||
<artifactId>rapids-4-spark-delta-23x_2.12</artifactId> | ||
<name>RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support</name> | ||
<description>Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark</description> | ||
<version>23.12.0-SNAPSHOT</version> | ||
|
||
<properties> | ||
<rapids.module>../delta-lake/delta-23x</rapids.module> | ||
<rapids.compressed.artifact>false</rapids.compressed.artifact> | ||
<rapids.default.jar.excludePattern>**/*</rapids.default.jar.excludePattern> | ||
<rapids.shim.jar.phase>package</rapids.shim.jar.phase> | ||
</properties> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.nvidia</groupId> | ||
<artifactId>rapids-4-spark-sql_${scala.binary.version}</artifactId> | ||
<version>${project.version}</version> | ||
<classifier>${spark.version.classifier}</classifier> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>io.delta</groupId> | ||
<artifactId>delta-core_${scala.binary.version}</artifactId> | ||
<version>2.3.0</version> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.apache.spark</groupId> | ||
<artifactId>spark-sql_${scala.binary.version}</artifactId> | ||
</dependency> | ||
</dependencies> | ||
|
||
<build> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.codehaus.mojo</groupId> | ||
<artifactId>build-helper-maven-plugin</artifactId> | ||
<executions> | ||
<execution> | ||
<id>add-common-sources</id> | ||
<phase>generate-sources</phase> | ||
<goals> | ||
<goal>add-source</goal> | ||
</goals> | ||
<configuration> | ||
<sources> | ||
<!-- #if scala-2.12 --> | ||
<source>${project.basedir}/../common/src/main/scala</source> | ||
<source>${project.basedir}/../common/src/main/delta-io/scala</source> | ||
<!-- #endif scala-2.12 --> | ||
<!-- #if scala-2.13 --><!-- | ||
<source>${project.basedir}/../../${rapids.module}/../common/src/main/scala</source> | ||
<source>${project.basedir}/../../${rapids.module}/../common/src/main/delta-io/scala</source> | ||
--><!-- #endif scala-2.13 --> | ||
</sources> | ||
</configuration> | ||
</execution> | ||
</executions> | ||
</plugin> | ||
<plugin> | ||
<groupId>net.alchim31.maven</groupId> | ||
<artifactId>scala-maven-plugin</artifactId> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.rat</groupId> | ||
<artifactId>apache-rat-plugin</artifactId> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
</project> |
58 changes: 58 additions & 0 deletions
58
...e/delta-23x/src/main/scala/com/nvidia/spark/rapids/delta/delta23x/DeleteCommandMeta.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.delta.delta23x | ||
|
||
import com.nvidia.spark.rapids.{DataFromReplacementRule, RapidsConf, RapidsMeta, RunnableCommandMeta} | ||
import com.nvidia.spark.rapids.delta.RapidsDeltaUtils | ||
|
||
import org.apache.spark.sql.SparkSession | ||
import org.apache.spark.sql.delta.commands.{DeleteCommand, DeletionVectorUtils} | ||
import org.apache.spark.sql.delta.rapids.GpuDeltaLog | ||
import org.apache.spark.sql.delta.rapids.delta23x.GpuDeleteCommand | ||
import org.apache.spark.sql.delta.sources.DeltaSQLConf | ||
import org.apache.spark.sql.execution.command.RunnableCommand | ||
|
||
class DeleteCommandMeta( | ||
deleteCmd: DeleteCommand, | ||
conf: RapidsConf, | ||
parent: Option[RapidsMeta[_, _, _]], | ||
rule: DataFromReplacementRule) | ||
extends RunnableCommandMeta[DeleteCommand](deleteCmd, conf, parent, rule) { | ||
|
||
override def tagSelfForGpu(): Unit = { | ||
if (!conf.isDeltaWriteEnabled) { | ||
willNotWorkOnGpu("Delta Lake output acceleration has been disabled. To enable set " + | ||
s"${RapidsConf.ENABLE_DELTA_WRITE} to true") | ||
} | ||
val dvFeatureEnabled = DeletionVectorUtils.deletionVectorsWritable( | ||
deleteCmd.deltaLog.unsafeVolatileSnapshot) | ||
if (dvFeatureEnabled && deleteCmd.conf.getConf( | ||
DeltaSQLConf.DELETE_USE_PERSISTENT_DELETION_VECTORS)) { | ||
// https://github.com/NVIDIA/spark-rapids/issues/8554 | ||
willNotWorkOnGpu("Deletion vectors are not supported on GPU") | ||
} | ||
RapidsDeltaUtils.tagForDeltaWrite(this, deleteCmd.target.schema, Some(deleteCmd.deltaLog), | ||
Map.empty, SparkSession.active) | ||
} | ||
|
||
override def convertToGpu(): RunnableCommand = { | ||
GpuDeleteCommand( | ||
new GpuDeltaLog(deleteCmd.deltaLog, conf), | ||
deleteCmd.target, | ||
deleteCmd.condition) | ||
} | ||
} |
110 changes: 110 additions & 0 deletions
110
...ke/delta-23x/src/main/scala/com/nvidia/spark/rapids/delta/delta23x/Delta23xProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.delta.delta23x | ||
|
||
import com.nvidia.spark.rapids.{AtomicCreateTableAsSelectExecMeta, AtomicReplaceTableAsSelectExecMeta, GpuExec, GpuOverrides, GpuReadParquetFileFormat, RunnableCommandRule, SparkPlanMeta} | ||
import com.nvidia.spark.rapids.delta.DeltaIOProvider | ||
|
||
import org.apache.spark.sql.delta.DeltaParquetFileFormat | ||
import org.apache.spark.sql.delta.DeltaParquetFileFormat.{IS_ROW_DELETED_COLUMN_NAME, ROW_INDEX_COLUMN_NAME} | ||
import org.apache.spark.sql.delta.catalog.DeltaCatalog | ||
import org.apache.spark.sql.delta.commands.{DeleteCommand, MergeIntoCommand, UpdateCommand} | ||
import org.apache.spark.sql.delta.rapids.DeltaRuntimeShim | ||
import org.apache.spark.sql.execution.FileSourceScanExec | ||
import org.apache.spark.sql.execution.command.RunnableCommand | ||
import org.apache.spark.sql.execution.datasources.FileFormat | ||
import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec} | ||
import org.apache.spark.sql.execution.datasources.v2.rapids.{GpuAtomicCreateTableAsSelectExec, GpuAtomicReplaceTableAsSelectExec} | ||
|
||
object Delta23xProvider extends DeltaIOProvider { | ||
|
||
override def getRunnableCommandRules: Map[Class[_ <: RunnableCommand], | ||
RunnableCommandRule[_ <: RunnableCommand]] = { | ||
Seq( | ||
GpuOverrides.runnableCmd[DeleteCommand]( | ||
"Delete rows from a Delta Lake table", | ||
(a, conf, p, r) => new DeleteCommandMeta(a, conf, p, r)) | ||
.disabledByDefault("Delta Lake delete support is experimental"), | ||
GpuOverrides.runnableCmd[MergeIntoCommand]( | ||
"Merge of a source query/table into a Delta table", | ||
(a, conf, p, r) => new MergeIntoCommandMeta(a, conf, p, r)) | ||
.disabledByDefault("Delta Lake merge support is experimental"), | ||
GpuOverrides.runnableCmd[UpdateCommand]( | ||
"Update rows in a Delta Lake table", | ||
(a, conf, p, r) => new UpdateCommandMeta(a, conf, p, r)) | ||
.disabledByDefault("Delta Lake update support is experimental") | ||
).map(r => (r.getClassFor.asSubclass(classOf[RunnableCommand]), r)).toMap | ||
} | ||
|
||
override def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = { | ||
val format = meta.wrapped.relation.fileFormat | ||
if (format.getClass == classOf[DeltaParquetFileFormat]) { | ||
val deltaFormat = format.asInstanceOf[DeltaParquetFileFormat] | ||
val requiredSchema = meta.wrapped.requiredSchema | ||
if (requiredSchema.exists(_.name == IS_ROW_DELETED_COLUMN_NAME)) { | ||
meta.willNotWorkOnGpu( | ||
s"reading metadata column $IS_ROW_DELETED_COLUMN_NAME is not supported") | ||
} | ||
if (requiredSchema.exists(_.name == ROW_INDEX_COLUMN_NAME)) { | ||
meta.willNotWorkOnGpu( | ||
s"reading metadata column $ROW_INDEX_COLUMN_NAME is not supported") | ||
} | ||
if (deltaFormat.hasDeletionVectorMap()) { | ||
meta.willNotWorkOnGpu("deletion vectors are not supported") | ||
} | ||
GpuReadParquetFileFormat.tagSupport(meta) | ||
} else { | ||
meta.willNotWorkOnGpu(s"format ${format.getClass} is not supported") | ||
} | ||
} | ||
|
||
override def getReadFileFormat(format: FileFormat): FileFormat = { | ||
val cpuFormat = format.asInstanceOf[DeltaParquetFileFormat] | ||
GpuDelta23xParquetFileFormat(cpuFormat.metadata, cpuFormat.isSplittable) | ||
} | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicCreateTableAsSelectExec, | ||
meta: AtomicCreateTableAsSelectExecMeta): GpuExec = { | ||
val cpuCatalog = cpuExec.catalog.asInstanceOf[DeltaCatalog] | ||
GpuAtomicCreateTableAsSelectExec( | ||
DeltaRuntimeShim.getGpuDeltaCatalog(cpuCatalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.ifNotExists) | ||
} | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicReplaceTableAsSelectExec, | ||
meta: AtomicReplaceTableAsSelectExecMeta): GpuExec = { | ||
val cpuCatalog = cpuExec.catalog.asInstanceOf[DeltaCatalog] | ||
GpuAtomicReplaceTableAsSelectExec( | ||
DeltaRuntimeShim.getGpuDeltaCatalog(cpuCatalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.orCreate, | ||
cpuExec.invalidateCache) | ||
} | ||
} |
Oops, something went wrong.