From 743aef2212d77ec1eed3fc2e974ddf856be82acd Mon Sep 17 00:00:00 2001 From: Navin Kumar Date: Wed, 3 Jan 2024 14:02:06 -0800 Subject: [PATCH] Add test for to_timestamp and add special cases for Asia/Shanghai transition times Signed-off-by: Navin Kumar --- .../src/main/python/date_time_test.py | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 5e94acc77c3..abba8b1fecd 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -14,7 +14,7 @@ import pytest from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_and_cpu_error -from conftest import is_utc, is_supported_time_zone +from conftest import is_utc, is_supported_time_zone, get_test_tz from data_gen import * from datetime import date, datetime, timezone from marks import ignore_order, incompat, allow_non_gpu, datagen_overrides, tz_sensitive_test @@ -394,7 +394,7 @@ def test_unix_timestamp(data_gen, ansi_enabled): (StringGen('[0-9]{4}/[01][12]/[0-2][1-8]'),'yyyy/MM/dd'), (StringGen('[01][12]/[0-2][1-8]'), 'MM/dd'), (StringGen('[0-2][1-8]/[01][12]'), 'dd/MM'), - (ConvertGen(DateGen(nullable=False), lambda d: d.strftime('%Y/%m').zfill(7), data_type=StringType()), 'yyyy/MM')] + (ConvertGen(DateGen(nullable=False), lambda d: d.strfte('%Y/%m').zfill(7), data_type=StringType()), 'yyyy/MM')] # get invalid date string df def invalid_date_string_df(spark): @@ -428,16 +428,36 @@ def test_string_unix_timestamp_ansi_exception(): error_message="Exception", conf=ansi_enabled_conf) -@pytest.mark.parametrize("ansi_enabled", [True, False], ids=['ANSI_ON', 'ANSI_OFF']) @tz_sensitive_test +@pytest.mark.parametrize('parser_policy', ["CORRECTED", "EXCEPTION"], ids=idfn) +def test_to_timestamp(parser_policy): + gen = StringGen("[0-9]{3}[1-9]-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]) ([0-1][0-9]|2[0-3]):[0-5][0-9]:[0-5][0-9]") + if get_test_tz() == "Asia/Shanghai": + # ensure some times around transition are tested + gen = gen.with_special_case("1991-04-14 02:00:00")\ + .with_special_case("1991-04-14 02:30:00")\ + .with_special_case("1991-04-14 03:00:00")\ + .with_special_case("1991-09-15 02:00:00")\ + .with_special_case("1991-09-15 02:30:00")\ + .with_special_case("1991-09-15 03:00:00") + assert_gpu_and_cpu_are_equal_collect( + lambda spark : unary_op_df(spark, gen) + .select(f.col("a"), f.to_timestamp(f.col("a"), "yyyy-MM-dd HH:mm:ss")), + { "spark.sql.legacy.timeParserPolicy": parser_policy}) + + +@tz_sensitive_test +@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +@pytest.mark.parametrize("ansi_enabled", [True, False], ids=['ANSI_ON', 'ANSI_OFF']) def test_to_date(ansi_enabled): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, date_gen) .select(f.to_date(f.col("a").cast('string'), "yyyy-MM-dd")), {'spark.sql.ansi.enabled': ansi_enabled}) -@pytest.mark.parametrize('data_gen', [StringGen('0[1-9][0-9]{4}')], ids=idfn) @tz_sensitive_test +@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +@pytest.mark.parametrize('data_gen', [StringGen('0[1-9][0-9]{4}')], ids=idfn) def test_to_date_format_MMyyyy(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, data_gen).select(f.to_date(f.col("a"), "MMyyyy")))