From e2d3fb95e39c65039a250d5b4e9e0e6940f5241b Mon Sep 17 00:00:00 2001
From: timl <timl@nvidia.com>
Date: Thu, 5 Dec 2024 16:17:26 +0800
Subject: [PATCH] Balance the pre-merge CI job's time for the ci_1 and ci_2
 tests

To fix: https://github.com/NVIDIA/spark-rapids/issues/11825

The pre-merge CI job is divided into CI_1 (mvn_verify) and CI_2.

We run these two parts in parallel to speed up the pre-merge CI.

Currently, CI_1 takes about 2 hours, while CI_2 takes approximately 4 hours.

Mark some tests as CI_1  to balance the time between CI_1 and CI_2

After remarking tests, both CI_1 and CI_2 jobs should be finished in 3 hours or so.

Signed-off-by: timl <timl@nvidia.com>
---
 integration_tests/src/main/python/join_test.py            | 3 ++-
 integration_tests/src/main/python/json_test.py            | 3 +++
 integration_tests/src/main/python/parquet_test.py         | 2 ++
 integration_tests/src/main/python/window_function_test.py | 3 +++
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/main/python/join_test.py b/integration_tests/src/main/python/join_test.py
index 936310bedeb..96021421d62 100644
--- a/integration_tests/src/main/python/join_test.py
+++ b/integration_tests/src/main/python/join_test.py
@@ -22,7 +22,8 @@
 from marks import ignore_order, allow_non_gpu, incompat, validate_execs_in_gpu_plan
 from spark_session import with_cpu_session, is_before_spark_330, is_databricks_runtime
 
-pytestmark = [pytest.mark.nightly_resource_consuming_test]
+# mark this test as ci_1 for mvn verify sanity check in pre-merge CI
+pytestmark = [pytest.mark.nightly_resource_consuming_test, pytest.mark.premerge_ci_1]
 
 all_non_sized_join_types = ['LeftSemi', 'LeftAnti', 'Cross']
 all_symmetric_sized_join_types = ['Inner', 'FullOuter']
diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py
index 6e8165846e7..b825975f398 100644
--- a/integration_tests/src/main/python/json_test.py
+++ b/integration_tests/src/main/python/json_test.py
@@ -23,6 +23,9 @@
 from marks import approximate_float, allow_non_gpu, ignore_order, datagen_overrides
 from spark_session import *
 
+# mark this test as ci_1 for mvn verify sanity check in pre-merge CI
+pytestmark = [pytest.mark.premerge_ci_1]
+
 TEXT_INPUT_EXEC='FileSourceScanExec'
 
 # allow non gpu when time zone is non-UTC because of https://github.com/NVIDIA/spark-rapids/issues/9653'
diff --git a/integration_tests/src/main/python/parquet_test.py b/integration_tests/src/main/python/parquet_test.py
index 6aa234003ba..a43d48e5ce9 100644
--- a/integration_tests/src/main/python/parquet_test.py
+++ b/integration_tests/src/main/python/parquet_test.py
@@ -28,6 +28,8 @@
 from spark_session import *
 from conftest import is_databricks_runtime, is_dataproc_runtime
 
+# mark this test as ci_1 for mvn verify sanity check in pre-merge CI
+pytestmark = [pytest.mark.premerge_ci_1]
 
 def read_parquet_df(data_path):
     return lambda spark : spark.read.parquet(data_path)
diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index 653eaffa940..d6b51342213 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -24,6 +24,9 @@
 from spark_session import is_before_spark_320, is_databricks113_or_later, is_databricks133_or_later, is_spark_350_or_later, spark_version, with_cpu_session
 import warnings
 
+# mark this test as ci_1 for mvn verify sanity check in pre-merge CI
+pytestmark = [pytest.mark.premerge_ci_1]
+
 _grpkey_longs_with_no_nulls = [
     ('a', RepeatSeqGen(LongGen(nullable=False), length=20)),
     ('b', IntegerGen()),