From 4e0c16ee1a86391e00f48fe430f0bb17c2466180 Mon Sep 17 00:00:00 2001
From: Jim Brennan <jimb@nvidia.com>
Date: Thu, 4 Jan 2024 08:26:46 -0600
Subject: [PATCH] fix test_hash_agg_with_nan_keys floating point sum failure
 (#10148)

Signed-off-by: Jim Brennan <jimb@nvidia.com>
---
 integration_tests/src/main/python/hash_aggregate_test.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/integration_tests/src/main/python/hash_aggregate_test.py b/integration_tests/src/main/python/hash_aggregate_test.py
index 3fe7bae2a28..e5835a998b8 100644
--- a/integration_tests/src/main/python/hash_aggregate_test.py
+++ b/integration_tests/src/main/python/hash_aggregate_test.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -159,7 +159,7 @@
 
 _grpkey_doubles_with_nan_zero_grouping_keys = [
     ('a', RepeatSeqGen(DoubleGen(nullable=(True, 10.0), special_cases=_nan_zero_double_special_cases), length=50)),
-    ('b', FloatGen(nullable=(True, 10.0))),
+    ('b', IntegerGen(nullable=(True, 10.0))),
     ('c', LongGen())]
 
 # Schema for xfail cases
@@ -1154,7 +1154,6 @@ def test_hash_multiple_filters(data_gen, conf):
         'min(a), max(b) filter (where c > 250) from hash_agg_table group by a',
         conf)
 
-@datagen_overrides(seed=0, reason='https://github.com/NVIDIA/spark-rapids/issues/10026')
 @approximate_float
 @ignore_order
 @pytest.mark.parametrize('data_gen', [_grpkey_floats_with_nan_zero_grouping_keys,
@@ -1222,7 +1221,7 @@ def test_hash_agg_with_struct_of_array_fallback(data_gen):
 
 @approximate_float
 @ignore_order
-@pytest.mark.parametrize('data_gen', [ _grpkey_doubles_with_nan_zero_grouping_keys], ids=idfn)
+@pytest.mark.parametrize('data_gen', [ _grpkey_floats_with_nulls_and_nans ], ids=idfn)
 def test_count_distinct_with_nan_floats(data_gen):
     assert_gpu_and_cpu_are_equal_sql(
         lambda spark : gen_df(spark, data_gen, length=1024),