Skip to content

Commit

Permalink
Change xfail to allow_non_gpu
Browse files Browse the repository at this point in the history
Signed-off-by: Chong Gao <[email protected]>
  • Loading branch information
Chong Gao committed Nov 28, 2023
1 parent 2eded07 commit 778fa00
Show file tree
Hide file tree
Showing 27 changed files with 260 additions and 265 deletions.
3 changes: 1 addition & 2 deletions integration_tests/src/main/python/aqe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,8 @@ def do_it(spark):
# broadcast join. The bug currently manifests in Databricks, but could
# theoretically show up in other Spark distributions
@ignore_order(local=True)
@allow_non_gpu('BroadcastNestedLoopJoinExec', 'Cast', 'DateSub', *db_113_cpu_bnlj_join_allow)
@allow_non_gpu('BroadcastNestedLoopJoinExec', 'Cast', 'DateSub', *db_113_cpu_bnlj_join_allow, *non_utc_allow)
@pytest.mark.parametrize('join', joins, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
def test_aqe_join_reused_exchange_inequality_condition(spark_tmp_path, join):
data_path = spark_tmp_path + '/PARQUET_DATA'
def prep(spark):
Expand Down
40 changes: 20 additions & 20 deletions integration_tests/src/main/python/cast_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_cast_nested(data_gen, to_type):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).select(f.col('a').cast(to_type)))

@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/9781")
def test_cast_string_date_valid_format():
# In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
Expand Down Expand Up @@ -91,7 +91,7 @@ def test_cast_string_date_valid_format():
# Spark 320+ and databricks support Ansi mode when casting string to date
# This means an exception will be thrown when casting invalid string to date on Spark 320+ or databricks
# test Spark versions < 3.2.0 and non databricks, ANSI mode
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
@pytest.mark.skipif(not is_before_spark_320(), reason="ansi cast(string as date) throws exception only in 3.2.0+ or db")
def test_cast_string_date_invalid_ansi_before_320():
data_rows = [(v,) for v in values_string_to_data]
Expand All @@ -101,7 +101,7 @@ def test_cast_string_date_invalid_ansi_before_320():
'spark.sql.ansi.enabled': 'true'}, )

# test Spark versions >= 320 and databricks, ANSI mode, valid values
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
@pytest.mark.skipif(is_before_spark_320(), reason="Spark versions(< 320) not support Ansi mode when casting string to date")
def test_cast_string_date_valid_ansi():
data_rows = [(v,) for v in valid_values_string_to_date]
Expand All @@ -112,7 +112,7 @@ def test_cast_string_date_valid_ansi():

# test Spark versions >= 320, ANSI mode
@pytest.mark.skipif(is_before_spark_320(), reason="ansi cast(string as date) throws exception only in 3.2.0+")
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
@pytest.mark.parametrize('invalid', invalid_values_string_to_date)
def test_cast_string_date_invalid_ansi(invalid):
assert_gpu_and_cpu_error(
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_try_cast_fallback_340(invalid):
'spark.sql.ansi.enabled': True})

# test all Spark versions, non ANSI mode, invalid value will be converted to NULL
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_string_date_non_ansi():
data_rows = [(v,) for v in values_string_to_data]
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -157,7 +157,7 @@ def test_cast_string_date_non_ansi():
StringGen('[0-9]{1,4}-[0-3][0-9]-[0-5][0-9][ |T][0-3][0-9]:[0-6][0-9]:[0-6][0-9]'),
StringGen('[0-9]{1,4}-[0-3][0-9]-[0-5][0-9][ |T][0-3][0-9]:[0-6][0-9]:[0-6][0-9].[0-9]{0,6}Z?')],
ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_string_ts_valid_format(data_gen):
# In Spark 3.2.0+ the valid format changed, and we cannot support all of the format.
# This provides values that are valid in all of those formats.
Expand Down Expand Up @@ -305,7 +305,7 @@ def _assert_cast_to_string_equal (data_gen, conf):

@pytest.mark.parametrize('data_gen', all_array_gens_for_cast_to_string, ids=idfn)
@pytest.mark.parametrize('legacy', ['true', 'false'])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_array_to_string(data_gen, legacy):
_assert_cast_to_string_equal(
data_gen,
Expand All @@ -325,7 +325,7 @@ def test_cast_array_with_unmatched_element_to_string(data_gen, legacy):

@pytest.mark.parametrize('data_gen', basic_map_gens_for_cast_to_string, ids=idfn)
@pytest.mark.parametrize('legacy', ['true', 'false'])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_map_to_string(data_gen, legacy):
_assert_cast_to_string_equal(
data_gen,
Expand All @@ -345,7 +345,7 @@ def test_cast_map_with_unmatched_element_to_string(data_gen, legacy):

@pytest.mark.parametrize('data_gen', [StructGen([[str(i), gen] for i, gen in enumerate(basic_array_struct_gens_for_cast_to_string)] + [["map", MapGen(ByteGen(nullable=False), null_gen)]])], ids=idfn)
@pytest.mark.parametrize('legacy', ['true', 'false'])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_struct_to_string(data_gen, legacy):
_assert_cast_to_string_equal(
data_gen,
Expand Down Expand Up @@ -410,7 +410,7 @@ def test_cast_string_to_negative_scale_decimal():
@pytest.mark.skipif(is_before_spark_330(), reason="ansi cast throws exception only in 3.3.0+")
@pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
@pytest.mark.parametrize('invalid_value', [float("inf"), float("-inf"), float("nan")])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_float_to_timestamp_ansi_for_nan_inf(type, invalid_value):
def fun(spark):
data = [invalid_value]
Expand All @@ -422,7 +422,7 @@ def fun(spark):
@pytest.mark.skipif(is_before_spark_330(), reason="ansi cast throws exception only in 3.3.0+")
@pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
@pytest.mark.parametrize('invalid_value', [float(LONG_MAX) + 100, float(LONG_MIN) - 100])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_float_to_timestamp_ansi_overflow(type, invalid_value):
def fun(spark):
data = [invalid_value]
Expand All @@ -431,7 +431,7 @@ def fun(spark):
assert_gpu_and_cpu_error(fun, {"spark.sql.ansi.enabled": True}, "ArithmeticException")

@pytest.mark.skipif(is_before_spark_330(), reason='330+ throws exception in ANSI mode')
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_float_to_timestamp_side_effect():
def getDf(spark):
data = [(True, float(LONG_MAX) + 100), (False, float(1))]
Expand All @@ -443,7 +443,7 @@ def getDf(spark):

# non ansi mode, will get null
@pytest.mark.parametrize('type', [DoubleType(), FloatType()], ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_float_to_timestamp_for_nan_inf(type):
def fun(spark):
data = [(float("inf"),), (float("-inf"),), (float("nan"),)]
Expand All @@ -463,7 +463,7 @@ def fun(spark):
short_gen,
int_gen,
long_gen_to_timestamp], ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_integral_to_timestamp(gen, ansi_enabled):
if(is_before_spark_330() and ansi_enabled): # 330- does not support in ANSI mode
pytest.skip()
Expand All @@ -472,7 +472,7 @@ def test_cast_integral_to_timestamp(gen, ansi_enabled):
conf={"spark.sql.ansi.enabled": ansi_enabled})

@pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_float_to_timestamp(ansi_enabled):
if(is_before_spark_330() and ansi_enabled): # 330- does not support in ANSI mode
pytest.skip()
Expand All @@ -482,7 +482,7 @@ def test_cast_float_to_timestamp(ansi_enabled):
conf={"spark.sql.ansi.enabled": ansi_enabled})

@pytest.mark.parametrize('ansi_enabled', [True, False], ids=['ANSI_ON', 'ANSI_OFF'])
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_double_to_timestamp(ansi_enabled):
if (is_before_spark_330() and ansi_enabled): # 330- does not support in ANSI mode
pytest.skip()
Expand All @@ -500,7 +500,7 @@ def test_cast_double_to_timestamp(ansi_enabled):
(INT_MIN - 1, IntegerType()),
], ids=idfn)
@pytest.mark.skipif(is_before_spark_330(), reason="Spark 330- does not ansi casting between numeric and timestamp")
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_timestamp_to_integral_ansi_overflow(invalid_and_type):
(invalid, to_type) = invalid_and_type
assert_gpu_and_cpu_error(
Expand All @@ -511,7 +511,7 @@ def test_cast_timestamp_to_integral_ansi_overflow(invalid_and_type):
error_message="overflow")

@pytest.mark.skipif(is_before_spark_330(), reason="Spark 330- does not ansi casting between numeric and timestamp")
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_timestamp_to_numeric_ansi_no_overflow():
data = [datetime.fromtimestamp(i) for i in range(BYTE_MIN, BYTE_MAX + 1)]
assert_gpu_and_cpu_are_equal_collect(
Expand All @@ -520,14 +520,14 @@ def test_cast_timestamp_to_numeric_ansi_no_overflow():
"cast(value as float)", "cast(value as double)"),
conf=ansi_enabled_conf)

@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_timestamp_to_numeric_non_ansi():
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, timestamp_gen)
.selectExpr("cast(a as byte)", "cast(a as short)", "cast(a as int)", "cast(a as long)",
"cast(a as float)", "cast(a as double)"))

@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_cast_timestamp_to_string():
assert_gpu_and_cpu_are_equal_collect(
lambda spark: unary_op_df(spark, timestamp_gen)
Expand Down
4 changes: 2 additions & 2 deletions integration_tests/src/main/python/cmp_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from data_gen import *
from spark_session import with_cpu_session, is_before_spark_330
from pyspark.sql.types import *
from marks import datagen_overrides
from marks import datagen_overrides, allow_non_gpu
import pyspark.sql.functions as f

@pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen + struct_gens_sample_with_decimal128_no_list, ids=idfn)
Expand Down Expand Up @@ -336,7 +336,7 @@ def test_in(data_gen):
# This is to test entries over that value.
@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9687')
@pytest.mark.parametrize('data_gen', eq_gens_with_decimal_gen, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_in_set(data_gen):
# nulls are not supported for in on the GPU yet
num_entries = int(with_cpu_session(lambda spark: spark.conf.get('spark.sql.optimizer.inSetConversionThreshold'))) + 1
Expand Down
12 changes: 7 additions & 5 deletions integration_tests/src/main/python/collection_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import pyspark.sql.utils
from spark_session import with_cpu_session, with_gpu_session
from conftest import get_datagen_seed
from marks import allow_non_gpu


nested_gens = [ArrayGen(LongGen()), ArrayGen(decimal_gen_128bit),
StructGen([("a", LongGen()), ("b", decimal_gen_128bit)]),
Expand Down Expand Up @@ -251,7 +253,7 @@ def test_sort_array_normalize_nans():
gens in sequence_normal_integral_gens]

@pytest.mark.parametrize('start_gen,stop_gen', sequence_normal_no_step_integral_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_sequence_without_step(start_gen, stop_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark: two_col_df(spark, start_gen, stop_gen).selectExpr(
Expand All @@ -260,7 +262,7 @@ def test_sequence_without_step(start_gen, stop_gen):
"sequence(20, b)"))

@pytest.mark.parametrize('start_gen,stop_gen,step_gen', sequence_normal_integral_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_sequence_with_step(start_gen, stop_gen, step_gen):
# Get the datagen seed we use for all datagens, since we need to call start
# on step_gen
Expand Down Expand Up @@ -309,7 +311,7 @@ def test_sequence_with_step(start_gen, stop_gen, step_gen):
]

@pytest.mark.parametrize('start_gen,stop_gen,step_gen', sequence_illegal_boundaries_integral_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
assert_gpu_and_cpu_error(
lambda spark:three_col_df(spark, start_gen, stop_gen, step_gen).selectExpr(
Expand All @@ -324,7 +326,7 @@ def test_sequence_illegal_boundaries(start_gen, stop_gen, step_gen):
]

@pytest.mark.parametrize('stop_gen', sequence_too_long_length_gens, ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_sequence_too_long_sequence(stop_gen):
assert_gpu_and_cpu_error(
# To avoid OOM, reduce the row number to 1, it is enough to verify this case.
Expand Down Expand Up @@ -366,7 +368,7 @@ def get_sequence_data(gen, len):
mixed_schema)

# test for 3 cases mixed in a single dataset
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_sequence_with_step_mixed_cases():
assert_gpu_and_cpu_are_equal_collect(
lambda spark: get_sequence_cases_mixed_df(spark)
Expand Down
6 changes: 3 additions & 3 deletions integration_tests/src/main/python/conditionals_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from data_gen import *
from spark_session import is_before_spark_320, is_jvm_charset_utf8
from pyspark.sql.types import *
from marks import datagen_overrides
from marks import datagen_overrides, allow_non_gpu
import pyspark.sql.functions as f

def mk_str_gen(pattern):
Expand Down Expand Up @@ -233,7 +233,7 @@ def test_conditional_with_side_effects_case_when(data_gen):
conf = test_conf)

@pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_conditional_with_side_effects_sequence(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr(
Expand All @@ -244,7 +244,7 @@ def test_conditional_with_side_effects_sequence(data_gen):

@pytest.mark.skipif(is_before_spark_320(), reason='Earlier versions of Spark cannot cast sequence to string')
@pytest.mark.parametrize('data_gen', [mk_str_gen('[a-z]{0,3}')], ids=idfn)
@pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653')
@allow_non_gpu(*non_utc_allow)
def test_conditional_with_side_effects_sequence_cast(data_gen):
assert_gpu_and_cpu_are_equal_collect(
lambda spark : unary_op_df(spark, data_gen).selectExpr(
Expand Down
Loading

0 comments on commit 778fa00

Please sign in to comment.