From 45cdac34667638b4d29e0ec5aab663d2588e3f26 Mon Sep 17 00:00:00 2001 From: MithunR Date: Mon, 9 Dec 2024 10:10:03 -0800 Subject: [PATCH] Fix for lead/lag window test failures. (#11823) Fixes #11807. `test_lead_lag_for_structs_with_arrays` in `window_function_test` fails intermittently because of non-deterministic data ordering. Window function tests are sensitive to data ordering. With certain values of DATAGEN_SEED, there are repeated values of partitioning/ordering keys, causing the window function to return different values on CPU and GPU. This commit fixes the test so that the ordering is deterministic. Signed-off-by: MithunR --- .../src/main/python/window_function_test.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py index 653eaffa940..7695c1adc9d 100644 --- a/integration_tests/src/main/python/window_function_test.py +++ b/integration_tests/src/main/python/window_function_test.py @@ -971,14 +971,12 @@ def do_it(spark): def test_lead_lag_for_structs_with_arrays(a_b_gen, struct_gen): data_gen = [ ('a', RepeatSeqGen(a_b_gen, length=20)), - ('b', IntegerGen(nullable=False, special_cases=[])), + ('b', UniqueLongGen(nullable=False)), ('c', struct_gen)] - # By default for many operations a range of unbounded to unbounded is used - # This will not work until https://github.com/NVIDIA/spark-rapids/issues/216 - # is fixed. + # For many operations, a range of unbounded to unbounded is used by default. - # Ordering needs to include c because with nulls and especially on booleans - # it is possible to get a different ordering when it is ambiguous. + # Ordering needs to include `b` because with nulls and especially on booleans, + # it is possible to get a different result when the ordering is ambiguous. base_window_spec = Window.partitionBy('a').orderBy('b') def do_it(spark):