From a956cb56ac67638e47b3af368a2e78e75f9f9a1a Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Wed, 27 Sep 2023 13:05:48 -0700
Subject: [PATCH 1/8] part_and_order_gens with nans

---
 integration_tests/src/main/python/array_test.py           | 1 +
 integration_tests/src/main/python/window_function_test.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/integration_tests/src/main/python/array_test.py b/integration_tests/src/main/python/array_test.py
index 6cbe4382338..99b68ccfba1 100644
--- a/integration_tests/src/main/python/array_test.py
+++ b/integration_tests/src/main/python/array_test.py
@@ -82,6 +82,7 @@
         DecimalGen(precision=12, scale=2, nullable=False),
         DecimalGen(precision=20, scale=2, nullable=False)]
 
+# This non-nans version is only used for Spark version < 3.1.3
 no_neg_zero_all_basic_gens_no_nans = [byte_gen, short_gen, int_gen, long_gen,
         # -0.0 cannot work because of -0.0 == 0.0 in cudf for distinct
         FloatGen(special_cases=[], no_nans=True), 
diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index 5a5347c70bf..e70df9fd770 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -134,7 +134,7 @@
     ('a', IntegerGen()),
     ('b', LongGen(nullable=True))]
 
-part_and_order_gens = [long_gen, DoubleGen(no_nans=True, special_cases=[]),
+part_and_order_gens = [long_gen, DoubleGen(special_cases=[]),
         string_gen, boolean_gen, timestamp_gen, DecimalGen(precision=18, scale=1),
         DecimalGen(precision=38, scale=1)]
 

From 8c4895573720412ea46d0cfb99fad74b2ae316ae Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Wed, 27 Sep 2023 13:08:23 -0700
Subject: [PATCH 2/8] running_part_and_oder_gens

---
 integration_tests/src/main/python/window_function_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index e70df9fd770..a606c87a2e8 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -138,7 +138,7 @@
         string_gen, boolean_gen, timestamp_gen, DecimalGen(precision=18, scale=1),
         DecimalGen(precision=38, scale=1)]
 
-running_part_and_order_gens = [long_gen, DoubleGen(no_nans=True, special_cases=[]),
+running_part_and_order_gens = [long_gen, DoubleGen(special_cases=[]),
         string_gen, byte_gen, timestamp_gen, DecimalGen(precision=18, scale=1),
         DecimalGen(precision=38, scale=1)]
 

From 5d23523a8d506db73ed74503f703b73bbb427a19 Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Wed, 27 Sep 2023 13:11:16 -0700
Subject: [PATCH 3/8] lead_lag_data_gens

---
 integration_tests/src/main/python/window_function_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/integration_tests/src/main/python/window_function_test.py b/integration_tests/src/main/python/window_function_test.py
index a606c87a2e8..b4708b89668 100644
--- a/integration_tests/src/main/python/window_function_test.py
+++ b/integration_tests/src/main/python/window_function_test.py
@@ -142,7 +142,7 @@
         string_gen, byte_gen, timestamp_gen, DecimalGen(precision=18, scale=1),
         DecimalGen(precision=38, scale=1)]
 
-lead_lag_data_gens = [long_gen, DoubleGen(no_nans=True, special_cases=[]),
+lead_lag_data_gens = [long_gen, DoubleGen(special_cases=[]),
         boolean_gen, timestamp_gen, string_gen, DecimalGen(precision=18, scale=3),
         DecimalGen(precision=38, scale=4),
         StructGen(children=[

From 9e399b14ea5009e0fb68e574da3c2bfebd812f34 Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Wed, 27 Sep 2023 17:26:41 -0700
Subject: [PATCH 4/8] removed no_nans gens

---
 integration_tests/src/main/python/data_gen.py |  4 --
 .../src/main/python/hash_aggregate_test.py    | 63 +++++++------------
 .../src/main/python/hashing_test.py           |  2 +-
 3 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/integration_tests/src/main/python/data_gen.py b/integration_tests/src/main/python/data_gen.py
index 9f549adfa46..2a9d7e5e6f0 100644
--- a/integration_tests/src/main/python/data_gen.py
+++ b/integration_tests/src/main/python/data_gen.py
@@ -1067,12 +1067,8 @@ def gen_scalars_for_sql(data_gen, count, seed=0, force_no_nulls=False):
 # all of the basic types in a single struct
 all_basic_struct_gen = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(all_basic_gens)])
 
-all_basic_struct_gen_no_nan = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(all_basic_gens_no_nan)])
-
 struct_array_gen = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(single_level_array_gens)])
 
-struct_array_gen_no_nans = StructGen([['child'+str(ind), sub_gen] for ind, sub_gen in enumerate(single_level_array_gens_no_nan)])
-
 # Some struct gens, but not all because of nesting
 nonempty_struct_gens_sample = [all_basic_struct_gen,
         StructGen([['child0', byte_gen], ['child1', all_basic_struct_gen]]),
diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 288cf3ebc07..e4c8434dc7c 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -167,19 +167,8 @@
     _grpkey_floats_with_nulls_and_nans
 ]
 
-# List of schemas with no NaNs
-_init_list_no_nans = [
-    _longs_with_nulls,
-    _longs_with_no_nulls,
-    _grpkey_longs_with_nulls,
-    _grpkey_dbls_with_nulls,
-    _grpkey_floats_with_nulls,
-    _grpkey_strings_with_nulls,
-    _grpkey_nulls,
-    _grpkey_strings_with_extra_nulls]
-
 # List of schemas with NaNs included
-_init_list_with_nans_and_no_nans = [
+_init_list_with_nans = [
     _longs_with_nulls,
     _longs_with_no_nulls,
     _grpkey_longs_with_nulls,
@@ -197,7 +186,7 @@
     ('b', DecimalGen(nullable=False)),
     ('c', DecimalGen(nullable=False))]
 
-_init_list_with_nans_and_no_nans_with_decimals = _init_list_with_nans_and_no_nans + [
+_init_list_with_nans_with_decimals = _init_list_with_nans + [
     _decimals_with_nulls, _decimals_with_no_nulls]
 
 # Used to test ANSI-mode fallback
@@ -303,15 +292,7 @@ def get_params(init_list, marked_params=[]):
     ('c', _decimal_gen_sum_38_neg10)]
 
 
-_init_list_no_nans_with_decimal = _init_list_no_nans + [
-    _grpkey_small_decimals]
-
-_init_list_no_nans_with_decimalbig = _init_list_no_nans + [
-    _grpkey_small_decimals, _grpkey_big_decimals, _grpkey_short_mid_decimals,
-    _grpkey_short_big_decimals, _grpkey_short_very_big_decimals,
-    _grpkey_short_very_big_neg_scale_decimals]
-
-_init_list_with_nans_and_no_nans_with_decimalbig = _init_list_with_nans_and_no_nans + [
+_init_list_with_nans_with_decimalbig = _init_list_with_nans + [
     _grpkey_small_decimals, _grpkey_big_decimals, _grpkey_short_mid_decimals,
     _grpkey_short_big_decimals, _grpkey_short_very_big_decimals,
     _grpkey_short_very_big_neg_scale_decimals]
@@ -378,7 +359,7 @@ def test_computation_in_grpby_columns():
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_sum(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -420,7 +401,7 @@ def test_hash_reduction_sum_full_decimal(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_and_no_nans + [_grpkey_short_mid_decimals,
+@pytest.mark.parametrize('data_gen', _init_list_with_nans + [_grpkey_short_mid_decimals,
     _grpkey_short_big_decimals, _grpkey_short_very_big_decimals, _grpkey_short_sum_full_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_avg(data_gen, conf):
@@ -451,7 +432,7 @@ def test_hash_avg_nulls_partial_only(data_gen):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
 def test_intersectAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen, length=100)))
@@ -459,7 +440,7 @@ def test_intersectAll(data_gen):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
 def test_exceptAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, data_gen, length=100).exceptAll(gen_df(spark, data_gen, length=100).filter('a != b')))
@@ -477,7 +458,7 @@ def test_exceptAll(data_gen):
     ('b', _pivot_gen_128bit),
     ('c', decimal_gen_128bit)]
 
-_pivot_gens_with_decimals = _init_list_with_nans_and_no_nans + [
+_pivot_gens_with_decimals = _init_list_with_nans + [
     _grpkey_small_decimals, _pivot_big_decimals, _grpkey_short_mid_decimals,
     _pivot_short_big_decimals, _grpkey_short_very_big_decimals,
     _grpkey_short_very_big_neg_scale_decimals]
@@ -497,9 +478,9 @@ def test_hash_grpby_pivot(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_pivot_without_nans(data_gen, conf):
+def test_hash_grpby_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby('a')
@@ -510,7 +491,7 @@ def test_hash_grpby_pivot_without_nans(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_and_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_grpby_pivot(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -523,9 +504,9 @@ def test_hash_multiple_grpby_pivot(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_pivot_without_nans(data_gen, conf):
+def test_hash_reduction_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby()
@@ -536,7 +517,7 @@ def test_hash_reduction_pivot_without_nans(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_and_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_reduction_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -943,7 +924,7 @@ def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback()
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_mode_query(data_gen, conf):
     print_params(data_gen)
@@ -965,7 +946,7 @@ def test_hash_multiple_mode_query(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs),
     ids=idfn)
 def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
@@ -978,7 +959,7 @@ def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
     local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
@@ -1001,7 +982,7 @@ def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_query_max_with_multiple_distincts(data_gen, conf):
     local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
@@ -1015,7 +996,7 @@ def test_hash_query_max_with_multiple_distincts(data_gen, conf):
         conf=local_conf)
 
 @ignore_order
-@pytest.mark.parametrize('data_gen', _init_list_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_count_with_filter(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1027,7 +1008,7 @@ def test_hash_count_with_filter(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_no_nans + [_grpkey_short_mid_decimals, _grpkey_short_big_decimals], ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans + [_grpkey_short_mid_decimals, _grpkey_short_big_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_filters(data_gen, conf):
     assert_gpu_and_cpu_are_equal_sql(
@@ -1784,7 +1765,7 @@ def do_it(spark):
 @ignore_order(local=True)
 @approximate_float
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_and_no_nans_with_decimals, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimals, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_std_variance(data_gen, conf):
     local_conf = copy_and_update(conf, {
@@ -1850,7 +1831,7 @@ def test_std_variance_nulls(data_gen, conf, ansi_enabled):
                'StddevPop', 'StddevSamp', 'VariancePop', 'VarianceSamp',
                'SortArray', 'Alias', 'Literal', 'Count',
                'AggregateExpression', 'ProjectExec')
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_and_no_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @pytest.mark.parametrize('replace_mode', _replace_modes_non_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)
diff --git a/integration_tests/src/main/python/hashing_test.py b/integration_tests/src/main/python/hashing_test.py
index 107c3a4576e..6bd56da933d 100644
--- a/integration_tests/src/main/python/hashing_test.py
+++ b/integration_tests/src/main/python/hashing_test.py
@@ -39,7 +39,7 @@
 
 _xxhash_fallback_gens = single_level_array_gens + nested_array_gens_sample + [
     all_basic_struct_gen,
-    struct_array_gen_no_nans,
+    struct_array_gen,
     _struct_of_xxhash_gens]
 if is_before_spark_320():
     _xxhash_fallback_gens += [float_gen, double_gen]

From 606b7a2d98a45669a751e529096e25ad7fd9615c Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Wed, 27 Sep 2023 17:30:18 -0700
Subject: [PATCH 5/8] renamed generator from with_nans_with_decimal* to
 with_nan_and_decimal*

---
 .../src/main/python/hash_aggregate_test.py    | 40 +++++++++----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index e4c8434dc7c..f7db575ce69 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -168,7 +168,7 @@
 ]
 
 # List of schemas with NaNs included
-_init_list_with_nans = [
+_init_list = [
     _longs_with_nulls,
     _longs_with_no_nulls,
     _grpkey_longs_with_nulls,
@@ -186,7 +186,7 @@
     ('b', DecimalGen(nullable=False)),
     ('c', DecimalGen(nullable=False))]
 
-_init_list_with_nans_with_decimals = _init_list_with_nans + [
+_init_list_with_decimals = _init_list + [
     _decimals_with_nulls, _decimals_with_no_nulls]
 
 # Used to test ANSI-mode fallback
@@ -292,7 +292,7 @@ def get_params(init_list, marked_params=[]):
     ('c', _decimal_gen_sum_38_neg10)]
 
 
-_init_list_with_nans_with_decimalbig = _init_list_with_nans + [
+_init_list_with_decimalbig = _init_list + [
     _grpkey_small_decimals, _grpkey_big_decimals, _grpkey_short_mid_decimals,
     _grpkey_short_big_decimals, _grpkey_short_very_big_decimals,
     _grpkey_short_very_big_neg_scale_decimals]
@@ -359,7 +359,7 @@ def test_computation_in_grpby_columns():
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_sum(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -401,7 +401,7 @@ def test_hash_reduction_sum_full_decimal(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans + [_grpkey_short_mid_decimals,
+@pytest.mark.parametrize('data_gen', _init_list + [_grpkey_short_mid_decimals,
     _grpkey_short_big_decimals, _grpkey_short_very_big_decimals, _grpkey_short_sum_full_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_avg(data_gen, conf):
@@ -432,7 +432,7 @@ def test_hash_avg_nulls_partial_only(data_gen):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 def test_intersectAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, data_gen, length=100).intersectAll(gen_df(spark, data_gen, length=100)))
@@ -440,7 +440,7 @@ def test_intersectAll(data_gen):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimalbig, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_decimalbig, ids=idfn)
 def test_exceptAll(data_gen):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark : gen_df(spark, data_gen, length=100).exceptAll(gen_df(spark, data_gen, length=100).filter('a != b')))
@@ -458,7 +458,7 @@ def test_exceptAll(data_gen):
     ('b', _pivot_gen_128bit),
     ('c', decimal_gen_128bit)]
 
-_pivot_gens_with_decimals = _init_list_with_nans + [
+_pivot_gens_with_decimals = _init_list + [
     _grpkey_small_decimals, _pivot_big_decimals, _grpkey_short_mid_decimals,
     _pivot_short_big_decimals, _grpkey_short_very_big_decimals,
     _grpkey_short_very_big_neg_scale_decimals]
@@ -478,7 +478,7 @@ def test_hash_grpby_pivot(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_grpby_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -491,7 +491,7 @@ def test_hash_grpby_pivot_with_nans(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_grpby_pivot(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -504,7 +504,7 @@ def test_hash_multiple_grpby_pivot(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_reduction_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -517,7 +517,7 @@ def test_hash_reduction_pivot_with_nans(data_gen, conf):
 @approximate_float
 @ignore_order(local=True)
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_reduction_pivot_with_nans(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -924,7 +924,7 @@ def test_hash_groupby_typed_imperative_agg_without_gpu_implementation_fallback()
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_mode_query(data_gen, conf):
     print_params(data_gen)
@@ -946,7 +946,7 @@ def test_hash_multiple_mode_query(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs),
     ids=idfn)
 def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
@@ -959,7 +959,7 @@ def test_hash_multiple_mode_query_avg_distincts(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
     local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
@@ -982,7 +982,7 @@ def test_hash_query_multiple_distincts_with_non_distinct(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_query_max_with_multiple_distincts(data_gen, conf):
     local_conf = copy_and_update(conf, {'spark.sql.legacy.allowParameterlessCount': 'true'})
@@ -996,7 +996,7 @@ def test_hash_query_max_with_multiple_distincts(data_gen, conf):
         conf=local_conf)
 
 @ignore_order
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_count_with_filter(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1008,7 +1008,7 @@ def test_hash_count_with_filter(data_gen, conf):
 @approximate_float
 @ignore_order
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans + [_grpkey_short_mid_decimals, _grpkey_short_big_decimals], ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list + [_grpkey_short_mid_decimals, _grpkey_short_big_decimals], ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_hash_multiple_filters(data_gen, conf):
     assert_gpu_and_cpu_are_equal_sql(
@@ -1765,7 +1765,7 @@ def do_it(spark):
 @ignore_order(local=True)
 @approximate_float
 @incompat
-@pytest.mark.parametrize('data_gen', _init_list_with_nans_with_decimals, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list_with_decimals, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 def test_std_variance(data_gen, conf):
     local_conf = copy_and_update(conf, {
@@ -1831,7 +1831,7 @@ def test_std_variance_nulls(data_gen, conf, ansi_enabled):
                'StddevPop', 'StddevSamp', 'VariancePop', 'VarianceSamp',
                'SortArray', 'Alias', 'Literal', 'Count',
                'AggregateExpression', 'ProjectExec')
-@pytest.mark.parametrize('data_gen', _init_list_with_nans, ids=idfn)
+@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
 @pytest.mark.parametrize('replace_mode', _replace_modes_non_distinct, ids=idfn)
 @pytest.mark.parametrize('aqe_enabled', ['false', 'true'], ids=idfn)

From 57c21d31a4aeb0406c9c7f770b8254a51eac37ae Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Thu, 28 Sep 2023 11:36:30 -0700
Subject: [PATCH 6/8] added missing gens to init_list

---
 integration_tests/src/main/python/hash_aggregate_test.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index f7db575ce69..ee836713bf3 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -175,6 +175,8 @@
     _grpkey_dbls_with_nulls,
     _grpkey_floats_with_nulls,
     _grpkey_strings_with_nulls,
+    _grpkey_strings_with_extra_nulls,
+    _grpkey_nulls,
     _grpkey_floats_with_nulls_and_nans]
 
 # grouping decimals with nulls

From 6dcd20d12563f81c9dfc3b879e13d1453b530801 Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Thu, 28 Sep 2023 17:25:15 -0700
Subject: [PATCH 7/8] addressed review comments

---
 .../src/main/python/hash_aggregate_test.py    | 28 +------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index ee836713bf3..4f58278360c 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -477,19 +477,6 @@ def test_hash_grpby_pivot(data_gen, conf):
             .agg(f.sum('c')),
         conf = conf)
 
-@approximate_float
-@ignore_order(local=True)
-@incompat
-@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
-@pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_grpby_pivot_with_nans(data_gen, conf):
-    assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: gen_df(spark, data_gen, length=100)
-            .groupby('a')
-            .pivot('b')
-            .agg(f.sum('c')),
-        conf=conf)
-
 @approximate_float
 @ignore_order(local=True)
 @incompat
@@ -508,20 +495,7 @@ def test_hash_multiple_grpby_pivot(data_gen, conf):
 @incompat
 @pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
 @pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_pivot_with_nans(data_gen, conf):
-    assert_gpu_and_cpu_are_equal_collect(
-        lambda spark: gen_df(spark, data_gen, length=100)
-            .groupby()
-            .pivot('b')
-            .agg(f.sum('c')),
-        conf=conf)
-
-@approximate_float
-@ignore_order(local=True)
-@incompat
-@pytest.mark.parametrize('data_gen', _init_list, ids=idfn)
-@pytest.mark.parametrize('conf', get_params(_confs, params_markers_for_confs), ids=idfn)
-def test_hash_reduction_pivot_with_nans(data_gen, conf):
+def test_hash_reduction_pivot(data_gen, conf):
     assert_gpu_and_cpu_are_equal_collect(
         lambda spark: gen_df(spark, data_gen, length=100)
             .groupby()

From 3c2d0f411ab1e73cce78f47dd7a1e5d96e95c17d Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Thu, 28 Sep 2023 17:32:03 -0700
Subject: [PATCH 8/8] Signing off

Signed-off-by: Raza Jafri <rjafri@nvidia.com>