From cb31afb07847ff96b16d70ceec54ee1426fe5e64 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 2 Dec 2024 18:19:17 -0600 Subject: [PATCH] Fall back to CPU for non-UTC months_between (#11802) Signed-off-by: Robert (Bobby) Evans --- integration_tests/src/main/python/date_time_test.py | 12 ++++++------ .../spark/sql/rapids/datetimeExpressions.scala | 10 +++++++++- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 5a98e06fadc..1a7024dac85 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -139,34 +139,34 @@ def test_datediff(data_gen): hms_fallback = ['ProjectExec'] if not is_supported_time_zone() else [] -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) def test_months_between(): assert_gpu_and_cpu_are_equal_collect( lambda spark : binary_op_df(spark, timestamp_gen).selectExpr('months_between(a, b, false)')) -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) def test_months_between_first_day(): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2024-01-01", false)')) -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) def test_months_between_last_day(): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2023-12-31", false)')) -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) @approximate_float() def test_months_between_round(): assert_gpu_and_cpu_are_equal_collect( lambda spark : binary_op_df(spark, timestamp_gen).selectExpr('months_between(a, b, true)')) -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) @approximate_float() def test_months_between_first_day_round(): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, timestamp_gen).selectExpr('months_between(a, timestamp"2024-01-01", true)')) -@allow_non_gpu(*hms_fallback) +@allow_non_gpu(*non_utc_tz_allow) @approximate_float() def test_months_between_last_day_round(): assert_gpu_and_cpu_are_equal_collect( diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala index 8ed4c50ac3b..0f382a7b6e6 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/datetimeExpressions.scala @@ -1217,7 +1217,8 @@ class MonthsBetweenExprMeta(expr: MonthsBetween, rule: DataFromReplacementRule) extends ExprMeta[MonthsBetween](expr, conf, parent, rule) { - override def isTimeZoneSupported = true + // See https://github.com/NVIDIA/spark-rapids/issues/11800 + override def isTimeZoneSupported = false override def convertToGpu(): GpuExpression = { val gpuChildren = childExprs.map(_.convertToGpu()) @@ -1287,6 +1288,13 @@ object GpuMonthsBetween { private def calcSecondsInDay(converted: ColumnVector): ColumnVector = { // Find the number of seconds that are not counted for in a day + // Rounding down to the current day, only works if you are in a time zone with no + // transition rules. This is because if a transition happens in between the start + // of the day and the timestamp we will be off. As such this will need to change to + // support other time zones, and it will need to take the timezone into account when + // calculating this. + // https://github.com/NVIDIA/spark-rapids/issues/11800 + // find the micros over by finding the part that is not days val microsInDay = withResource(converted.dateTimeFloor(DateTimeRoundingFrequency.DAY)) { days => // But we cannot subtract timestamps directly. They are both micros