diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py index 9fb4650836c..a79f1a76d03 100644 --- a/integration_tests/src/main/python/cast_test.py +++ b/integration_tests/src/main/python/cast_test.py @@ -205,6 +205,7 @@ def test_ansi_cast_decimal_to(data_gen, to_type): conf = {'spark.rapids.sql.castDecimalToFloat.enabled': True, 'spark.sql.ansi.enabled': True}) +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10050') @pytest.mark.parametrize('data_gen', [ DecimalGen(7, 1), DecimalGen(9, 9), diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 8b0ff3e5c68..6e4154d0346 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -17,7 +17,7 @@ from conftest import is_utc, is_supported_time_zone from data_gen import * from datetime import date, datetime, timezone -from marks import ignore_order, incompat, allow_non_gpu +from marks import ignore_order, incompat, allow_non_gpu, datagen_overrides from pyspark.sql.types import * from spark_session import with_cpu_session, is_before_spark_330, is_before_spark_350 import pyspark.sql.functions as f @@ -207,6 +207,7 @@ def test_datesub(data_gen): # than -106032829 for date('0001-01-01') so we have to cap the days values to the lower upper and # lower ranges. to_unix_timestamp_days_gen=[ByteGen(), ShortGen(), IntegerGen(min_val=-106032829, max_val=103819094, special_cases=[-106032829, 103819094,0,1,-1])] +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10027') @pytest.mark.parametrize('data_gen', to_unix_timestamp_days_gen, ids=idfn) @incompat @allow_non_gpu(*non_utc_allow) @@ -221,6 +222,7 @@ def test_dateadd_with_date_overflow(data_gen): 'unix_timestamp(date_add(a, cast(24 as {})))'.format(string_type))) to_unix_timestamp_days_gen=[ByteGen(), ShortGen(), IntegerGen(max_val=106032829, min_val=-103819094, special_cases=[106032829, -103819094,0,1,-1])] +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10027') @pytest.mark.parametrize('data_gen', to_unix_timestamp_days_gen, ids=idfn) @incompat @allow_non_gpu(*non_utc_allow) diff --git a/integration_tests/src/main/python/delta_lake_update_test.py b/integration_tests/src/main/python/delta_lake_update_test.py index b1348fdfe17..dbba39ebab9 100644 --- a/integration_tests/src/main/python/delta_lake_update_test.py +++ b/integration_tests/src/main/python/delta_lake_update_test.py @@ -142,6 +142,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.parametrize("enable_deletion_vectors", [True, False], ids=idfn) @pytest.mark.skipif(not supports_delta_lake_deletion_vectors(), reason="Deletion vectors are new in Spark 3.4.0 / DBR 12.2") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10025') def test_delta_update_rows_with_dv(spark_tmp_path, use_cdf, partition_columns, enable_deletion_vectors): # Databricks changes the number of files being written, so we cannot compare logs unless there's only one slice num_slices_to_test = 1 if is_databricks_runtime() else 10 @@ -160,6 +161,7 @@ def generate_dest_data(spark): @pytest.mark.parametrize("use_cdf", [True, False], ids=idfn) @pytest.mark.parametrize("partition_columns", [None, ["a"]], ids=idfn) @pytest.mark.skipif(is_before_spark_320(), reason="Delta Lake writes are not supported before Spark 3.2.x") +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10025') def test_delta_update_dataframe_api(spark_tmp_path, use_cdf, partition_columns): from delta.tables import DeltaTable data_path = spark_tmp_path + "/DELTA_DATA" diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py index 83591043f48..631ac8e71b1 100644 --- a/integration_tests/src/main/python/hash_aggregate_test.py +++ b/integration_tests/src/main/python/hash_aggregate_test.py @@ -1168,6 +1168,7 @@ def test_hash_multiple_filters(data_gen, conf): 'min(a), max(b) filter (where c > 250) from hash_agg_table group by a', conf) +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10026') @approximate_float @ignore_order @pytest.mark.parametrize('data_gen', [_grpkey_floats_with_nan_zero_grouping_keys, diff --git a/integration_tests/src/main/python/orc_cast_test.py b/integration_tests/src/main/python/orc_cast_test.py index 48efd5c8174..48dbad54e51 100644 --- a/integration_tests/src/main/python/orc_cast_test.py +++ b/integration_tests/src/main/python/orc_cast_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_and_cpu_error from conftest import is_not_utc from data_gen import * -from marks import allow_non_gpu +from marks import allow_non_gpu, datagen_overrides from pyspark.sql.types import * from spark_session import with_cpu_session from orc_test import reader_opt_confs @@ -103,6 +103,7 @@ def test_casting_from_float_and_double(spark_tmp_path, to_type): ) +@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10017') @pytest.mark.parametrize('data_gen', [DoubleGen(max_exp=32, special_cases=None), DoubleGen(max_exp=32, special_cases=[8.88e9, 9.99e10, 1.314e11])]) @allow_non_gpu(*non_utc_allow_orc_scan)