diff --git a/integration_tests/src/main/python/conditionals_test.py b/integration_tests/src/main/python/conditionals_test.py index 48d5a05c099..0370d24da11 100644 --- a/integration_tests/src/main/python/conditionals_test.py +++ b/integration_tests/src/main/python/conditionals_test.py @@ -219,6 +219,7 @@ def test_conditional_with_side_effects_col_scalar(data_gen): @pytest.mark.parametrize('data_gen', [mk_str_gen('[0-9]{1,20}')], ids=idfn) @pytest.mark.skipif(not is_jvm_charset_utf8(), reason="regular expressions require UTF-8") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9992') def test_conditional_with_side_effects_cast(data_gen): test_conf=copy_and_update( ansi_enabled_conf, {'spark.rapids.sql.regexp.enabled': True}) diff --git a/integration_tests/src/main/python/delta_lake_delete_test.py b/integration_tests/src/main/python/delta_lake_delete_test.py index 413479b3a12..fe2659bf8b7 100644 --- a/integration_tests/src/main/python/delta_lake_delete_test.py +++ b/integration_tests/src/main/python/delta_lake_delete_test.py @@ -153,6 +153,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_delete_rows(spark_tmp_path, use_cdf, partition_columns): # Databricks changes the number of files being written, so we cannot compare logs unless there's only one slice num_slices_to_test = 1 if is_databricks_runtime() else 10 @@ -171,6 +172,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_delete_dataframe_api(spark_tmp_path, use_cdf, partition_columns): from delta.tables import DeltaTable data_path = spark_tmp_path + "/DELTA_DATA" diff --git a/integration_tests/src/main/python/delta_lake_update_test.py b/integration_tests/src/main/python/delta_lake_update_test.py index 0fc65658332..b1348fdfe17 100644 --- a/integration_tests/src/main/python/delta_lake_update_test.py +++ b/integration_tests/src/main/python/delta_lake_update_test.py @@ -122,6 +122,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9884') def test_delta_update_rows(spark_tmp_path, use_cdf, partition_columns): # Databricks changes the number of files being written, so we cannot compare logs unless there's only one slice num_slices_to_test = 1 if is_databricks_runtime() else 10 diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 0c99fc4516a..6fada82dd37 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -396,6 +396,7 @@ def test_hash_reduction_sum(data_gen, conf): @pytest.mark.parametrize('data_gen', numeric_gens + decimal_gens + [ DecimalGen(precision=38, scale=0), DecimalGen(precision=38, scale=-10)], ids=idfn) @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn) +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9779') def test_hash_reduction_sum_full_decimal(data_gen, conf): assert_gpu_and_cpu_are_equal_collect( lambda spark: unary_op_df(spark, data_gen, length=100).selectExpr("SUM(a)"), diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py index d9f4c0f0899..248d915523e 100644 --- a/integration_tests/src/main/python/schema_evolution_test.py +++ b/integration_tests/src/main/python/schema_evolution_test.py @@ -16,7 +16,7 @@ from conftest import is_not_utc from data_gen import * from datetime import date, datetime, timezone -from marks import ignore_order +from marks import ignore_order, datagen_overrides import pytest from spark_session import is_databricks_runtime, is_databricks113_or_later @@ -63,6 +63,7 @@ def get_ddl(col_gen_pairs): @ignore_order(local=True) @pytest.mark.parametrize("format", _formats) @pytest.mark.xfail(condition = is_not_utc(), reason = 'xfail non-UTC time zone tests because of https://github.com/NVIDIA/spark-rapids/issues/9653') +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/9807') def test_column_add_after_partition(spark_tmp_table_factory, format): # Databricks 10.4 appears to be missing https://issues.apache.org/jira/browse/SPARK-39417 # so avoid generating nulls for numeric partitions