From 2c04c3e991a56ffca2158c6261567c64cd55d305 Mon Sep 17 00:00:00 2001 From: Haoyang Li Date: Thu, 25 Apr 2024 10:30:14 +0800 Subject: [PATCH] Use fixed seed for some random failed tests (#10739) * set fixed seed for some random failed tests Signed-off-by: Haoyang Li * add import Signed-off-by: Haoyang Li --------- Signed-off-by: Haoyang Li --- integration_tests/src/main/python/dpp_test.py | 3 ++- integration_tests/src/main/python/hash_aggregate_test.py | 5 +++++ integration_tests/src/main/python/regexp_test.py | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py index c9f0eadab1a..cd4610cf95c 100644 --- a/integration_tests/src/main/python/dpp_test.py +++ b/integration_tests/src/main/python/dpp_test.py @@ -19,7 +19,7 @@ from asserts import assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_and_cpu_are_equal_collect from conftest import spark_tmp_table_factory from data_gen import * -from marks import ignore_order, allow_non_gpu +from marks import ignore_order, allow_non_gpu, datagen_overrides from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later # non-positive values here can produce a degenerative join, so here we ensure that most values are @@ -171,6 +171,7 @@ def fn(spark): # When BroadcastExchangeExec is available on filtering side, and it can be reused: # DynamicPruningExpression(InSubqueryExec(value, GpuSubqueryBroadcastExec))) @ignore_order +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10147") @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn) @pytest.mark.parametrize('s_index', list(range(len(_statements))), ids=idfn) @pytest.mark.parametrize('aqe_enabled', [ diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 5d2bb7d658d..4b28574b677 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -917,6 +917,7 @@ def exact_percentile_reduction(df): 'percentile(val, array(0, 0.0001, 0.5, 0.9999, 1), abs(freq))' ) +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10233") @pytest.mark.parametrize('data_gen', exact_percentile_reduction_data_gen, ids=idfn) def test_exact_percentile_reduction(data_gen): assert_gpu_and_cpu_are_equal_collect( @@ -993,6 +994,7 @@ def exact_percentile_groupby(df): ) @ignore_order +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10719") @pytest.mark.parametrize('data_gen', exact_percentile_groupby_data_gen, ids=idfn) def test_exact_percentile_groupby(data_gen): assert_gpu_and_cpu_are_equal_collect( @@ -1010,6 +1012,7 @@ def test_exact_percentile_groupby(data_gen): @allow_non_gpu('ObjectHashAggregateExec', 'SortAggregateExec', 'ShuffleExchangeExec', 'HashPartitioning', 'AggregateExpression', 'Alias', 'Cast', 'Literal', 'ProjectExec', 'Percentile') +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10738") @pytest.mark.parametrize('data_gen', exact_percentile_groupby_cpu_fallback_data_gen, ids=idfn) @pytest.mark.parametrize('replace_mode', ['partial', 'final|complete'], ids=idfn) @pytest.mark.parametrize('use_obj_hash_agg', ['false', 'true'], ids=idfn) @@ -1080,6 +1083,7 @@ def test_hash_multiple_mode_query(data_gen, conf): @approximate_float @ignore_order @incompat +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10234") @pytest.mark.parametrize('data_gen', _init_list, ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) @@ -1093,6 +1097,7 @@ def test_hash_multiple_mode_query_avg_distincts(data_gen, conf): @approximate_float @ignore_order @incompat +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10388") @pytest.mark.parametrize('data_gen', _init_list, ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf): diff --git a/integration_tests/src/main/python/regexp_test.py b/integration_tests/src/main/python/regexp_test.py index ff47d0020f3..e14a465d8e0 100644 --- a/integration_tests/src/main/python/regexp_test.py +++ b/integration_tests/src/main/python/regexp_test.py @@ -563,6 +563,7 @@ def test_character_classes(): ), conf=_regexp_conf) +@datagen_overrides(seed=0, reason="https://github.com/NVIDIA/spark-rapids/issues/10641") def test_regexp_choice(): gen = mk_str_gen('[abcd]{1,3}[0-9]{1,3}[abcd]{1,3}[ \n\t\r]{0,2}') assert_gpu_and_cpu_are_equal_collect(