-
Notifications
You must be signed in to change notification settings - Fork 241
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Partial Delta Lake Support for Databricks 13.3 (#9644)
* delta lake changes * Signing off Signed-off-by: Raza Jafri <[email protected]> * fixed 341db delta parent * delta lake changes for AtomicCreateTableAsSelectExec * added commit tags * addressed review comments * added scala 2.13 pom * removed unnecessary change * fixed merge conflicts * fixed line length * updated udf-compiler pom.xml * updated sql-plugin pom.xml * fixed multiple pom.xml * updated udf-compiler pom.xml * Singing off Signed-off-by: Raza Jafri <[email protected]> * Revert "updated udf-compiler pom.xml" This reverts commit e2fd85c. * Revert "fixed multiple pom.xml" This reverts commit 7a87438. * Revert "updated sql-plugin pom.xml" This reverts commit 6c1259d. * Revert "updated udf-compiler pom.xml" This reverts commit 7f34d35. * Fixed params to GpuAtomicCreateTableAsSelectExec * Fixed GpuAtomicReplaceTableAsSelectExec params * addressed review comments * xfail test_delta_atomic_create_table_as_select and test_delta_atomic_replace_table_as_select * allow WriteFilesExec on CPU --------- Signed-off-by: Raza Jafri <[email protected]> Signed-off-by: Raza Jafri <[email protected]> Co-authored-by: raza <[email protected]>
- Loading branch information
Showing
37 changed files
with
4,744 additions
and
57 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
25 changes: 25 additions & 0 deletions
25
delta-lake/delta-spark321db/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.nvidia.spark.rapids.delta | ||
|
||
/** | ||
* Implements the Delta Probe interface for probing the Delta Lake provider on Databricks. | ||
* @note This is instantiated via reflection from ShimLoader. | ||
*/ | ||
class DeltaProbeImpl extends DeltaProbe { | ||
// Delta Lake is built-in for Databricks instances, so no probing is necessary. | ||
override def getDeltaProvider: DeltaProvider = DeltaSpark321DBProvider | ||
} |
57 changes: 57 additions & 0 deletions
57
...lta-spark321db/src/main/scala/com/nvidia/spark/rapids/delta/DeltaSpark321DBProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.delta | ||
|
||
import com.databricks.sql.transaction.tahoe.rapids.GpuDeltaCatalog | ||
import com.nvidia.spark.rapids.{AtomicCreateTableAsSelectExecMeta, AtomicReplaceTableAsSelectExecMeta, GpuExec} | ||
|
||
import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec} | ||
import org.apache.spark.sql.execution.datasources.v2.rapids.{GpuAtomicCreateTableAsSelectExec, GpuAtomicReplaceTableAsSelectExec} | ||
|
||
object DeltaSpark321DBProvider extends DatabricksDeltaProviderBase { | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicCreateTableAsSelectExec, | ||
meta: AtomicCreateTableAsSelectExecMeta): GpuExec = { | ||
GpuAtomicCreateTableAsSelectExec( | ||
cpuExec.output, | ||
new GpuDeltaCatalog(cpuExec.catalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.ifNotExists) | ||
} | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicReplaceTableAsSelectExec, | ||
meta: AtomicReplaceTableAsSelectExecMeta): GpuExec = { | ||
GpuAtomicReplaceTableAsSelectExec( | ||
cpuExec.output, | ||
new GpuDeltaCatalog(cpuExec.catalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.orCreate, | ||
cpuExec.invalidateCache) | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
...e/delta-spark321db/src/main/scala/com/nvidia/spark/rapids/delta/shims/MetadataShims.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.nvidia.spark.rapids.delta.shims | ||
|
||
import com.databricks.sql.transaction.tahoe.stats.UsesMetadataFields | ||
|
||
trait ShimUsesMetadataFields extends UsesMetadataFields |
25 changes: 25 additions & 0 deletions
25
delta-lake/delta-spark330db/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.nvidia.spark.rapids.delta | ||
|
||
/** | ||
* Implements the Delta Probe interface for probing the Delta Lake provider on Databricks. | ||
* @note This is instantiated via reflection from ShimLoader. | ||
*/ | ||
class DeltaProbeImpl extends DeltaProbe { | ||
// Delta Lake is built-in for Databricks instances, so no probing is necessary. | ||
override def getDeltaProvider: DeltaProvider = DeltaSpark330DBProvider | ||
} |
57 changes: 57 additions & 0 deletions
57
...lta-spark330db/src/main/scala/com/nvidia/spark/rapids/delta/DeltaSpark330DBProvider.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.nvidia.spark.rapids.delta | ||
|
||
import com.databricks.sql.transaction.tahoe.rapids.GpuDeltaCatalog | ||
import com.nvidia.spark.rapids.{AtomicCreateTableAsSelectExecMeta, AtomicReplaceTableAsSelectExecMeta, GpuExec} | ||
|
||
import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec} | ||
import org.apache.spark.sql.execution.datasources.v2.rapids.{GpuAtomicCreateTableAsSelectExec, GpuAtomicReplaceTableAsSelectExec} | ||
|
||
object DeltaSpark330DBProvider extends DatabricksDeltaProviderBase { | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicCreateTableAsSelectExec, | ||
meta: AtomicCreateTableAsSelectExecMeta): GpuExec = { | ||
GpuAtomicCreateTableAsSelectExec( | ||
cpuExec.output, | ||
new GpuDeltaCatalog(cpuExec.catalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.ifNotExists) | ||
} | ||
|
||
override def convertToGpu( | ||
cpuExec: AtomicReplaceTableAsSelectExec, | ||
meta: AtomicReplaceTableAsSelectExecMeta): GpuExec = { | ||
GpuAtomicReplaceTableAsSelectExec( | ||
cpuExec.output, | ||
new GpuDeltaCatalog(cpuExec.catalog, meta.conf), | ||
cpuExec.ident, | ||
cpuExec.partitioning, | ||
cpuExec.plan, | ||
meta.childPlans.head.convertIfNeeded(), | ||
cpuExec.tableSpec, | ||
cpuExec.writeOptions, | ||
cpuExec.orCreate, | ||
cpuExec.invalidateCache) | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
...e/delta-spark330db/src/main/scala/com/nvidia/spark/rapids/delta/shims/MetadataShims.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.nvidia.spark.rapids.delta.shims | ||
|
||
import com.databricks.sql.transaction.tahoe.stats.UsesMetadataFields | ||
|
||
trait ShimUsesMetadataFields extends UsesMetadataFields |
25 changes: 25 additions & 0 deletions
25
delta-lake/delta-spark332db/src/main/scala/com/nvidia/spark/rapids/delta/DeltaProbe.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.nvidia.spark.rapids.delta | ||
|
||
/** | ||
* Implements the Delta Probe interface for probing the Delta Lake provider on Databricks. | ||
* @note This is instantiated via reflection from ShimLoader. | ||
*/ | ||
class DeltaProbeImpl extends DeltaProbe { | ||
// Delta Lake is built-in for Databricks instances, so no probing is necessary. | ||
override def getDeltaProvider: DeltaProvider = DeltaSpark332DBProvider | ||
} |
Oops, something went wrong.