NVIDIA · res-life · Oct 19, 2023 · Oct 19, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
@@ -223,11 +223,14 @@ else
         export PYSP_TEST_spark_jars="${ALL_JARS//:/,}"
     fi
 
+    # time zone will be tested
+    TEST_TZ=${TEST_TZ:-UTC}
+
     # Set the Delta log cache size to prevent the driver from caching every Delta log indefinitely
-    export PYSP_TEST_spark_driver_extraJavaOptions="-ea -Duser.timezone=UTC -Ddelta.log.cacheSize=10 $COVERAGE_SUBMIT_FLAGS"
-    export PYSP_TEST_spark_executor_extraJavaOptions='-ea -Duser.timezone=UTC'
+    export PYSP_TEST_spark_driver_extraJavaOptions="-ea -Duser.timezone=$TEST_TZ -Ddelta.log.cacheSize=10 $COVERAGE_SUBMIT_FLAGS"
+    export PYSP_TEST_spark_executor_extraJavaOptions="-ea -Duser.timezone=$TEST_TZ"
     export PYSP_TEST_spark_ui_showConsoleProgress='false'
-    export PYSP_TEST_spark_sql_session_timeZone='UTC'
+    export PYSP_TEST_spark_sql_session_timeZone=$TEST_TZ
     export PYSP_TEST_spark_sql_shuffle_partitions='4'
     # prevent cluster shape to change
     export PYSP_TEST_spark_dynamicAllocation_enabled='false'

diff --git a/integration_tests/src/main/python/aqe_test.py b/integration_tests/src/main/python/aqe_test.py
@@ -17,7 +17,7 @@
 from pyspark.sql.types import *
 from asserts import assert_gpu_and_cpu_are_equal_collect, assert_cpu_and_gpu_are_equal_collect_with_capture
 from data_gen import *
-from marks import ignore_order, allow_non_gpu
+from marks import ignore_order, allow_non_gpu, disable_timezone_test
 from spark_session import with_cpu_session, is_databricks113_or_later
 
 _adaptive_conf = { "spark.sql.adaptive.enabled": "true" }
@@ -195,6 +195,7 @@ def do_it(spark):
 @ignore_order(local=True)
 @allow_non_gpu('BroadcastNestedLoopJoinExec', 'Cast', 'DateSub', *db_113_cpu_bnlj_join_allow)
 @pytest.mark.parametrize('join', joins, ids=idfn)
+@disable_timezone_test
 def test_aqe_join_reused_exchange_inequality_condition(spark_tmp_path, join):
     data_path = spark_tmp_path + '/PARQUET_DATA'
     def prep(spark):

diff --git a/integration_tests/src/main/python/asserts.py b/integration_tests/src/main/python/asserts.py
@@ -213,6 +213,21 @@ def bring_back(spark):
             return (df.collect(), df)
         collect_type = 'COLLECT'
         return (bring_back, collect_type)
+    elif mode == "COLLECT_ERROR_WITH_DATAFRAME":
+        def bring_back(spark):
+            """
+            return collect error and df
+            if there is no error, collect error is empty string
+            """
+            df = limit_func(spark)
+            collect_error = ""
+            try:
+                df.collect()
+            except Exception as e:
+                collect_error = str(e)
+            return (collect_error, df)
+        collect_type = 'COLLECT'
+        return (bring_back, collect_type)
     else:
         bring_back = lambda spark: limit_func(spark).toLocalIterator()
         collect_type = 'ITERATOR'
@@ -444,6 +459,30 @@ def assert_gpu_fallback_collect(func,
 
     assert_equal(from_cpu, from_gpu)
 
+def assert_gpu_fallback_and_collect_with_error(func,
+        cpu_fallback_class_name,
+        error_message,
+        conf={}):
+    (bring_back, collect_type) = _prep_func_for_compare(func, 'COLLECT_ERROR_WITH_DATAFRAME')
+    conf = _prep_incompat_conf(conf)
+
+    print('### CPU RUN ###')
+    cpu_start = time.time()
+    collect_error, cpu_df = with_cpu_session(bring_back, conf=conf)
+    assert error_message in collect_error, f"Expected error '{error_message}' did not appear in '{collect_error}'"
+    cpu_end = time.time()
+
+    print('### GPU RUN ###')
+    gpu_start = time.time()
+    collect_error, gpu_df = with_gpu_session(bring_back, conf=conf)
+    assert error_message in collect_error, f"Expected error '{error_message}' did not appear in '{collect_error}'"
+    gpu_end = time.time()
+    jvm = spark_jvm()
+    jvm.org.apache.spark.sql.rapids.ExecutionPlanCaptureCallback.assertDidFallBack(gpu_df._jdf, cpu_fallback_class_name)
+    print('### {}: GPU TOOK {} CPU TOOK {} ###'.format(collect_type,
+        gpu_end - gpu_start, cpu_end - cpu_start))
+
+
 def assert_gpu_sql_fallback_collect(df_fun, cpu_fallback_class_name, table_name, sql, conf=None, debug=False):
     if conf is None:
         conf = {}
@@ -622,6 +661,25 @@ def do_it_all(spark):
             return spark.sql(sql)
     assert_gpu_and_cpu_are_equal_collect(do_it_all, conf, is_cpu_first=is_cpu_first)
 
+def assert_gpu_fallback_sql(df_fun, table_name, sql, fallback_class_name, conf=None):
+    """
+    Assert that the specified SQL query produces equal results on CPU and GPU.
+    :param df_fun: a function that will create the dataframe
+    :param table_name: Name of table to be created with the dataframe
+    :param sql: SQL query to be run on the specified table
+    :param fallback_class_name: Name of the class that GPU falls back to
+    :param conf: Any user-specified confs. Empty by default.
+    :return: Assertion failure, if results from CPU and GPU do not match.
+    """
+    if conf is None:
+        conf = {}
+    def do_it_all(spark):
+        df = df_fun(spark)
+        df.createOrReplaceTempView(table_name)
+        return spark.sql(sql)
+    assert_gpu_fallback_collect(do_it_all, fallback_class_name, conf)
+
+
 def assert_spark_exception(func, error_message):
     """
     Assert that a specific Java exception is thrown