NVIDIA · andygrove · Nov 29, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 1, 2023
diff --git a/docs/compatibility.md b/docs/compatibility.md
@@ -346,7 +346,6 @@ with Spark, and can be enabled by setting `spark.rapids.sql.expression.StructsTo
 
 Known issues are:
 
-- There is no support for timestamp types
 - There can be rounding differences when formatting floating-point numbers as strings. For example, Spark may
   produce `-4.1243574E26` but the GPU may produce `-4.124357351E26`.
 - Not all JSON options are respected

diff --git a/docs/supported_ops.md b/docs/supported_ops.md
@@ -14541,16 +14541,16 @@ are limited.
 <td>S</td>
 <td>S</td>
 <td>S</td>
-<td> </td>
-<td> </td>
+<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for TIMESTAMP</em></td>
 <td>S</td>
 <td> </td>
 <td> </td>
 <td> </td>
 <td> </td>
-<td>S</td>
-<td>S</td>
-<td>S</td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
+<td><em>PS<br/>UTC is only supported TZ for child TIMESTAMP</em></td>
 <td> </td>
 </tr>
 <tr>

diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
@@ -612,8 +612,8 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     long_gen,
     pytest.param(float_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
     pytest.param(double_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9350')),
-    pytest.param(date_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9515')),
-    pytest.param(timestamp_gen, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9515')),
+    date_gen,
+    timestamp_gen,
     StringGen('[A-Za-z0-9\r\n\'"\\\\]{0,10}', nullable=True) \
         .with_special_case('\u1f600') \
         .with_special_case('"a"') \
@@ -628,7 +628,8 @@ def test_read_case_col_name(spark_tmp_path, v1_enabled_list, col_name):
     pytest.param(True, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9517')),
     False
 ])
-def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty):
+@pytest.mark.parametrize('timezone', ['UTC', 'Etc/UTC'])
+def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty, timezone):
     struct_gen = StructGen([
         ('a', data_gen),
         ("b", StructGen([('child', data_gen)], nullable=True)),
@@ -640,7 +641,8 @@ def test_structs_to_json(spark_tmp_path, data_gen, ignore_null_fields, pretty):
     gen = StructGen([('my_struct', struct_gen)], nullable=False)
 
     options = { 'ignoreNullFields': ignore_null_fields,
-                'pretty': pretty }
+                'pretty': pretty,
+                'timeZone': timezone}
 
     def struct_to_json(spark):
         df = gen_df(spark, gen)

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala
@@ -730,6 +730,7 @@ object GpuCast {
       fromDataType: DataType, options: CastOptions): ColumnVector = fromDataType match {
     case StringType => input.copyToColumnVector()
     case DateType => input.asStrings("%Y-%m-%d")
+    case TimestampType if options.castToJsonString => castTimestampToJson(input)
     case TimestampType => castTimestampToString(input)
     case FloatType | DoubleType => castFloatingTypeToString(input)
     case BinaryType => castBinToString(input, options)
@@ -773,6 +774,14 @@ object GpuCast {
     }
   }
 
+  private def castTimestampToJson(input: ColumnView): ColumnVector = {
+    // we fall back to CPU if the JSON timezone is not UTC, so it is safe
+    // to hard-code `Z` here for now, but we should really add a timestamp
+    // format to CastOptions when we add support for custom formats in
+    // https://github.com/NVIDIA/spark-rapids/issues/9602
+    input.asStrings("%Y-%m-%dT%H:%M:%S.%3fZ")
+  }
+
   /**
    * A 5 steps solution for concatenating string array column. <p>
    * Giving an input with 3 rows:
@@ -932,7 +941,8 @@ object GpuCast {
         // to be represented by the string literal `null`
         val strValue = closeOnExcept(strKey) { _ =>
           withResource(kvStructColumn.getChildColumnView(1)) { valueColumn =>
-            val valueStr = if (valueColumn.getType == DType.STRING) {
+            val dt = valueColumn.getType
+            val valueStr = if (dt == DType.STRING || dt.isDurationType || dt.isTimestampType) {
               withResource(castToString(valueColumn, from.valueType, options)) { valueStr =>
                 addQuotes(valueStr, valueColumn.getRowCount.toInt)
               }
@@ -1102,8 +1112,9 @@ object GpuCast {
         colon: ColumnVector,
         quote: ColumnVector): ColumnVector = {
       val jsonName = StringEscapeUtils.escapeJson(inputSchema(fieldIndex).name)
-      val dataType = inputSchema(fieldIndex).dataType
-      val needsQuoting = dataType == DataTypes.StringType
+      val dt = inputSchema(fieldIndex).dataType
+      val needsQuoting = dt == DataTypes.StringType || dt == DataTypes.DateType ||
+        dt == DataTypes.TimestampType
       withResource(input.getChildColumnView(fieldIndex)) { cv =>
         withResource(ArrayBuffer.empty[ColumnVector]) { attrColumns =>
           // prefix with quoted column name followed by colon

diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala
@@ -3598,15 +3598,47 @@ object GpuOverrides extends Logging {
         TypeSig.STRING,
         Seq(ParamCheck("struct",
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
-            TypeSig.DOUBLE + TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested(),
+            TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested(),
           (TypeSig.BOOLEAN + TypeSig.STRING + TypeSig.integral + TypeSig.FLOAT +
-            TypeSig.DOUBLE + TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested()
+            TypeSig.DOUBLE + TypeSig.DATE + TypeSig.TIMESTAMP +
+            TypeSig.STRUCT + TypeSig.ARRAY + TypeSig.MAP).nested()
         ))),
       (a, conf, p, r) => new UnaryExprMeta[StructsToJson](a, conf, p, r) {
         override def tagExprForGpu(): Unit = {
           if (a.options.get("pretty").exists(_.equalsIgnoreCase("true"))) {
             willNotWorkOnGpu("to_json option pretty=true is not supported")
           }
+          val hasDates = TrampolineUtil.dataTypeExistsRecursively(a.child.dataType,
+            _.isInstanceOf[DateType])
+          if (hasDates) {
+            // check if the default format is being used
+            val defaultFormat = "yyyy-MM-dd"
+            val dateFormat = a.options.getOrElse("dateFormat", defaultFormat)
+            if (dateFormat != defaultFormat) {
+              // we can likely support other formats but we would need to add tests
+              // tracking issue is https://github.com/NVIDIA/spark-rapids/issues/9602
+              willNotWorkOnGpu(s"Unsupported dateFormat '$dateFormat' in to_json")
+            }
+          }
+          val hasTimestamps = TrampolineUtil.dataTypeExistsRecursively(a.child.dataType,
+            _.isInstanceOf[TimestampType])
+          if (hasTimestamps) {
+            // check if the default format is being used
+            val defaultFormat = "yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"
+            val timestampFormat = a.options.getOrElse("timestampFormat", defaultFormat)
+            if (timestampFormat != defaultFormat) {
+              // we can likely support other formats but we would need to add tests
+              // tracking issue is https://github.com/NVIDIA/spark-rapids/issues/9602
+              willNotWorkOnGpu(s"Unsupported timestampFormat '$timestampFormat' in to_json")
+            }
+            val timeZone = a.options.getOrElse("timeZone", SQLConf.get.sessionLocalTimeZone)
+            if (timeZone != "UTC" && timeZone != "Etc/UTC") {
+              // we hard-code the timezone `Z` in GpuCast.castTimestampToJson
+              // so we need to fall back if a different timeZone is specified
+              willNotWorkOnGpu(s"Unsupported timeZone '$timeZone' in to_json")
+            }
+          }
         }
 
         override def convertToGpu(child: Expression): GpuExpression =

diff --git a/tools/generated_files/supportedExprs.csv b/tools/generated_files/supportedExprs.csv
@@ -539,7 +539,7 @@ StringTrimLeft,S,`ltrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,
 StringTrimRight,S,`rtrim`,None,project,src,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,trimStr,NA,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA
 StringTrimRight,S,`rtrim`,None,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
-StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,NA,NA,S,NA,NA,NA,NA,S,S,S,NA
+StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,struct,S,S,S,S,S,S,S,S,PS,S,NA,NA,NA,NA,PS,PS,PS,NA
 StructsToJson,NS,`to_json`,This is disabled by default because to_json support is experimental. See compatibility guide for more information.,project,result,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,str,NA,NA,NA,NA,NA,NA,NA,NA,NA,S,NA,NA,NS,NA,NA,NA,NA,NA
 Substring,S,`substr`; `substring`,None,project,pos,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA