Skip to content

Commit

Permalink
shim work
Browse files Browse the repository at this point in the history
  • Loading branch information
andygrove committed Oct 5, 2023
1 parent 34b737d commit 5140e5d
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 11 deletions.
16 changes: 6 additions & 10 deletions sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCSVScan.scala
Original file line number Diff line number Diff line change
Expand Up @@ -195,17 +195,13 @@ object GpuCSVScan {
s"To enable it please set ${RapidsConf.ENABLE_READ_CSV_DECIMALS} to true.")
}

// TODO move this check to 350 shim
if (parsedOptions.inferSchemaFlag) {
parsedOptions.timestampFormatInRead match {
case Some(fmt) => fmt match {
case "yyyy-MM-dd" | "yyyy-MM" | "yyyy-MM-dd'T'HH:mm" | "yyyy-MM-dd'T'HH:mm:ss" =>
// https://github.com/NVIDIA/spark-rapids/issues/9325
meta.willNotWorkOnGpu(s"timestampFormat '$fmt' is not compatible with " +
s"Spark >= 3.5.0 when schema inference is enabled")
case _ =>
}
case _ =>
val timestampFormat = GpuCsvUtils.timestampFormatInRead(parsedOptions)
val unsupported = GpuCsvUtils.unsupportedTimestampFormatsForSchemaInference(parsedOptions)
if (unsupported.contains(timestampFormat)) {
// https://github.com/NVIDIA/spark-rapids/issues/9325
meta.willNotWorkOnGpu(s"timestampFormat '$timestampFormat' is not supported when " +
s"schema inference is enabled")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,6 @@ object GpuCsvUtils {
def dateFormatInRead(options: CSVOptions): String = options.dateFormat
def timestampFormatInRead(options: CSVOptions): String = options.timestampFormat
def enableDateTimeParsingFallback(options: CSVOptions): Boolean = false

def unsupportedTimestampFormatsForSchemaInference(options: CSVOptions): Set[String] = Set.empty
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,6 @@ object GpuCsvUtils {
})

def enableDateTimeParsingFallback(options: CSVOptions): Boolean = false

def unsupportedTimestampFormatsForSchemaInference(options: CSVOptions): Set[String] = Set.empty
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,6 @@ object GpuCsvUtils {
})

def enableDateTimeParsingFallback(options: CSVOptions): Boolean = false

def unsupportedTimestampFormatsForSchemaInference(options: CSVOptions): Set[String] = Set.empty
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
/*** spark-rapids-shim-json-lines
{"spark": "340"}
{"spark": "341"}
{"spark": "350"}
spark-rapids-shim-json-lines ***/
package org.apache.spark.sql.catalyst.csv

Expand All @@ -38,4 +37,6 @@ object GpuCsvUtils {

def enableDateTimeParsingFallback(options: CSVOptions): Boolean =
options.enableDateTimeParsingFallback.getOrElse(false)

def unsupportedTimestampFormatsForSchemaInference(options: CSVOptions): Set[String] = Set.empty
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/*** spark-rapids-shim-json-lines
{"spark": "350"}
spark-rapids-shim-json-lines ***/
package org.apache.spark.sql.catalyst.csv

import com.nvidia.spark.rapids.shims.LegacyBehaviorPolicyShim

import org.apache.spark.sql.catalyst.util.DateFormatter

object GpuCsvUtils {
def dateFormatInRead(options: CSVOptions): String =
options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern)

def timestampFormatInRead(options: CSVOptions): String = options.timestampFormatInRead.getOrElse(
if (LegacyBehaviorPolicyShim.isLegacyTimeParserPolicy()) {
s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
} else {
s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
})

def enableDateTimeParsingFallback(options: CSVOptions): Boolean =
options.enableDateTimeParsingFallback.getOrElse(false)

def unsupportedTimestampFormatsForSchemaInference(options: CSVOptions): Set[String] =
Set("yyyy-MM-dd", "yyyy-MM", "yyyy-MM-dd'T'HH:mm", "yyyy-MM-dd'T'HH:mm:ss")
}

0 comments on commit 5140e5d

Please sign in to comment.