-
Notifications
You must be signed in to change notification settings - Fork 237
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support to_utc_timestamp [databricks] #10144
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
# Copyright (c) 2020-2023, NVIDIA CORPORATION. | ||
# Copyright (c) 2020-2024, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
|
@@ -286,41 +286,52 @@ def test_unsupported_fallback_to_unix_timestamp(data_gen): | |
spark, [("a", data_gen), ("b", string_gen)], length=10).selectExpr( | ||
"to_unix_timestamp(a, b)"), | ||
"ToUnixTimestamp") | ||
|
||
supported_timezones = ["Asia/Shanghai", "UTC", "UTC+0", "UTC-0", "GMT", "GMT+0", "GMT-0", "EST", "MST", "VST"] | ||
unsupported_timezones = ["PST", "NST", "AST", "America/Los_Angeles", "America/New_York", "America/Chicago"] | ||
|
||
@pytest.mark.parametrize('time_zone', ["Asia/Shanghai", "UTC", "UTC+0", "UTC-0", "GMT", "GMT+0", "GMT-0"], ids=idfn) | ||
@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn) | ||
@tz_sensitive_test | ||
@pytest.mark.parametrize('time_zone', supported_timezones, ids=idfn) | ||
@allow_non_gpu(*non_utc_allow) | ||
def test_from_utc_timestamp(data_gen, time_zone): | ||
def test_from_utc_timestamp(time_zone): | ||
assert_gpu_and_cpu_are_equal_collect( | ||
lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone))) | ||
lambda spark: unary_op_df(spark, timestamp_gen).select(f.from_utc_timestamp(f.col('a'), time_zone))) | ||
|
||
@allow_non_gpu('ProjectExec') | ||
@pytest.mark.parametrize('time_zone', ["PST", "NST", "AST", "America/Los_Angeles", "America/New_York", "America/Chicago"], ids=idfn) | ||
@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn) | ||
@tz_sensitive_test | ||
def test_from_utc_timestamp_unsupported_timezone_fallback(data_gen, time_zone): | ||
@pytest.mark.parametrize('time_zone', unsupported_timezones, ids=idfn) | ||
def test_from_utc_timestamp_unsupported_timezone_fallback(time_zone): | ||
assert_gpu_fallback_collect( | ||
lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)), | ||
lambda spark: unary_op_df(spark, timestamp_gen).select(f.from_utc_timestamp(f.col('a'), time_zone)), | ||
'FromUTCTimestamp') | ||
|
||
@pytest.mark.parametrize('time_zone', ["UTC", "Asia/Shanghai", "EST", "MST", "VST"], ids=idfn) | ||
@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn) | ||
@tz_sensitive_test | ||
@allow_non_gpu('ProjectExec') | ||
def test_unsupported_fallback_from_utc_timestamp(): | ||
time_zone_gen = StringGen(pattern="UTC") | ||
assert_gpu_fallback_collect( | ||
lambda spark: gen_df(spark, [("a", timestamp_gen), ("tzone", time_zone_gen)]).selectExpr( | ||
"from_utc_timestamp(a, tzone)"), | ||
'FromUTCTimestamp') | ||
|
||
@allow_non_gpu(*non_utc_allow) | ||
def test_from_utc_timestamp_supported_timezones(data_gen, time_zone): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This case looks duplicated with |
||
@pytest.mark.parametrize('time_zone', supported_timezones, ids=idfn) | ||
def test_to_utc_timestamp(time_zone): | ||
assert_gpu_and_cpu_are_equal_collect( | ||
lambda spark: unary_op_df(spark, data_gen).select(f.from_utc_timestamp(f.col('a'), time_zone))) | ||
lambda spark: unary_op_df(spark, timestamp_gen).select(f.to_utc_timestamp(f.col('a'), time_zone))) | ||
|
||
@allow_non_gpu('ProjectExec') | ||
@pytest.mark.parametrize('time_zone', unsupported_timezones, ids=idfn) | ||
@pytest.mark.parametrize('data_gen', [timestamp_gen], ids=idfn) | ||
def test_unsupported_fallback_from_utc_timestamp(data_gen): | ||
time_zone_gen = StringGen(pattern="UTC") | ||
def test_to_utc_timestamp_unsupported_timezone_fallback(data_gen, time_zone): | ||
assert_gpu_fallback_collect( | ||
lambda spark: gen_df(spark, [("a", data_gen), ("tzone", time_zone_gen)]).selectExpr( | ||
"from_utc_timestamp(a, tzone)"), | ||
'FromUTCTimestamp') | ||
lambda spark: unary_op_df(spark, data_gen).select(f.to_utc_timestamp(f.col('a'), time_zone)), | ||
'ToUTCTimestamp') | ||
|
||
@allow_non_gpu('ProjectExec') | ||
def test_unsupported_fallback_to_utc_timestamp(): | ||
time_zone_gen = StringGen(pattern="UTC") | ||
assert_gpu_fallback_collect( | ||
lambda spark: gen_df(spark, [("a", timestamp_gen), ("tzone", time_zone_gen)]).selectExpr( | ||
"to_utc_timestamp(a, tzone)"), | ||
'ToUTCTimestamp') | ||
|
||
@allow_non_gpu('ProjectExec') | ||
@pytest.mark.parametrize('data_gen', [long_gen], ids=idfn) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/* | ||
* Copyright (c) 2019-2023, NVIDIA CORPORATION. | ||
* Copyright (c) 2019-2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
|
@@ -27,7 +27,7 @@ import com.nvidia.spark.rapids.RapidsPluginImplicits._ | |
import com.nvidia.spark.rapids.jni.GpuTimeZoneDB | ||
import com.nvidia.spark.rapids.shims.ShimBinaryExpression | ||
|
||
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ExpectsInputTypes, Expression, FromUnixTime, FromUTCTimestamp, ImplicitCastInputTypes, NullIntolerant, TimeZoneAwareExpression} | ||
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, ExpectsInputTypes, Expression, FromUnixTime, FromUTCTimestamp, ImplicitCastInputTypes, NullIntolerant, TimeZoneAwareExpression, ToUTCTimestamp} | ||
import org.apache.spark.sql.catalyst.util.DateTimeConstants | ||
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.types._ | ||
|
@@ -1132,6 +1132,65 @@ case class GpuFromUTCTimestamp( | |
} | ||
} | ||
|
||
class ToUTCTimestampExprMeta( | ||
expr: ToUTCTimestamp, | ||
override val conf: RapidsConf, | ||
override val parent: Option[RapidsMeta[_, _, _]], | ||
rule: DataFromReplacementRule) | ||
extends BinaryExprMeta[ToUTCTimestamp](expr, conf, parent, rule) { | ||
|
||
private[this] var timezoneId: ZoneId = null | ||
|
||
override def tagExprForGpu(): Unit = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
extractStringLit(expr.right) match { | ||
case None => | ||
willNotWorkOnGpu("timezone input must be a literal string") | ||
case Some(timezoneShortID) => | ||
if (timezoneShortID != null) { | ||
timezoneId = GpuTimeZoneDB.getZoneId(timezoneShortID) | ||
if (!GpuTimeZoneDB.isSupportedTimeZone(timezoneId)) { | ||
willNotWorkOnGpu(s"Not supported timezone type $timezoneShortID.") | ||
} | ||
} | ||
} | ||
} | ||
|
||
override def convertToGpu(timestamp: Expression, timezone: Expression): GpuExpression = | ||
GpuToUTCTimestamp(timestamp, timezone, timezoneId) | ||
} | ||
|
||
case class GpuToUTCTimestamp( | ||
timestamp: Expression, timezone: Expression, zoneId: ZoneId) | ||
extends GpuBinaryExpressionArgsAnyScalar | ||
with ImplicitCastInputTypes | ||
with NullIntolerant { | ||
|
||
override def left: Expression = timestamp | ||
override def right: Expression = timezone | ||
override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType) | ||
override def dataType: DataType = TimestampType | ||
|
||
override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = { | ||
if (rhs.getBase.isValid) { | ||
if (GpuOverrides.isUTCTimezone(zoneId)) { | ||
// For UTC timezone, just a no-op bypassing GPU computation. | ||
lhs.getBase.incRefCount() | ||
} else { | ||
GpuTimeZoneDB.fromTimestampToUtcTimestamp(lhs.getBase, zoneId) | ||
} | ||
} else { | ||
// All-null output column. | ||
GpuColumnVector.columnVectorFromNull(lhs.getRowCount.toInt, dataType) | ||
} | ||
} | ||
|
||
override def doColumnar(numRows: Int, lhs: GpuScalar, rhs: GpuScalar): ColumnVector = { | ||
withResource(GpuColumnVector.from(lhs, numRows, left.dataType)) { lhsCol => | ||
doColumnar(lhsCol, rhs) | ||
} | ||
} | ||
} | ||
|
||
trait GpuDateMathBase extends GpuBinaryExpression with ExpectsInputTypes { | ||
override def inputTypes: Seq[AbstractDataType] = | ||
Seq(DateType, TypeCollection(IntegerType, ShortType, ByteType)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In my understanding it's not a tz_sensitive_test case because it will only run on gpu under utc timezone and fallback for all other timezones. We can add the timezones we want to test to the
supported_timezones
list.