From e2d3fb95e39c65039a250d5b4e9e0e6940f5241b Mon Sep 17 00:00:00 2001 From: timl Date: Thu, 5 Dec 2024 16:17:26 +0800 Subject: [PATCH] Balance the pre-merge CI job's time for the ci_1 and ci_2 tests To fix: https://github.com/NVIDIA/spark-rapids/issues/11825 The pre-merge CI job is divided into CI_1 (mvn_verify) and CI_2. We run these two parts in parallel to speed up the pre-merge CI. Currently, CI_1 takes about 2 hours, while CI_2 takes approximately 4 hours. Mark some tests as CI_1 to balance the time between CI_1 and CI_2 After remarking tests, both CI_1 and CI_2 jobs should be finished in 3 hours or so. Signed-off-by: timl --- integration_tests/src/main/python/join_test.py | 3 ++- integration_tests/src/main/python/json_test.py | 3 +++ integration_tests/src/main/python/parquet_test.py | 2 ++ integration_tests/src/main/python/window_function_test.py | 3 +++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py index 936310bedeb..96021421d62 100644 --- a/integration_tests/src/main/python/join_test.py +++ b/integration_tests/src/main/python/join_test.py @@ -22,7 +22,8 @@ from marks import ignore_order, allow_non_gpu, incompat, validate_execs_in_gpu_plan from spark_session import with_cpu_session, is_before_spark_330, is_databricks_runtime -pytestmark = [pytest.mark.nightly_resource_consuming_test] +# mark this test as ci_1 for mvn verify sanity check in pre-merge CI +pytestmark = [pytest.mark.nightly_resource_consuming_test, pytest.mark.premerge_ci_1] all_non_sized_join_types = ['LeftSemi', 'LeftAnti', 'Cross'] all_symmetric_sized_join_types = ['Inner', 'FullOuter'] diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py index 6e8165846e7..b825975f398 100644 --- a/integration_tests/src/main/python/json_test.py +++ b/integration_tests/src/main/python/json_test.py @@ -23,6 +23,9 @@ from marks import approximate_float, allow_non_gpu, ignore_order, datagen_overrides from spark_session import * +# mark this test as ci_1 for mvn verify sanity check in pre-merge CI +pytestmark = [pytest.mark.premerge_ci_1] + TEXT_INPUT_EXEC='FileSourceScanExec' # allow non gpu when time zone is non-UTC because of https://github.com/NVIDIA/spark-rapids/issues/9653' diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py index 6aa234003ba..a43d48e5ce9 100644 --- a/integration_tests/src/main/python/parquet_test.py +++ b/integration_tests/src/main/python/parquet_test.py @@ -28,6 +28,8 @@ from spark_session import * from conftest import is_databricks_runtime, is_dataproc_runtime +# mark this test as ci_1 for mvn verify sanity check in pre-merge CI +pytestmark = [pytest.mark.premerge_ci_1] def read_parquet_df(data_path): return lambda spark : spark.read.parquet(data_path) diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py index 653eaffa940..d6b51342213 100644 --- a/integration_tests/src/main/python/window_function_test.py +++ b/integration_tests/src/main/python/window_function_test.py @@ -24,6 +24,9 @@ from spark_session import is_before_spark_320, is_databricks113_or_later, is_databricks133_or_later, is_spark_350_or_later, spark_version, with_cpu_session import warnings +# mark this test as ci_1 for mvn verify sanity check in pre-merge CI +pytestmark = [pytest.mark.premerge_ci_1] + _grpkey_longs_with_no_nulls = [ ('a', RepeatSeqGen(LongGen(nullable=False), length=20)), ('b', IntegerGen()),