Address missed review comments

Signed-off-by: Kuhu Shukla <[email protected]>
NVIDIA · Dec 6, 2024 · 2e2202b · 2e2202b
1 parent 0ced895
commit 2e2202b
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 14 deletions.
diff --git a/integration_tests/src/main/python/datasourcev2_write_test.py b/integration_tests/src/main/python/datasourcev2_write_test.py
@@ -33,7 +33,7 @@
 @allow_non_gpu(*non_utc_allow)
 def test_write_hive_bucketed_table(spark_tmp_table_factory, file_format):
     num_rows = 2048
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen
     def gen_table(spark):

diff --git a/integration_tests/src/main/python/hive_write_test.py b/integration_tests/src/main/python/hive_write_test.py
@@ -29,7 +29,7 @@ def _restricted_timestamp(nullable=True):
                         end=datetime(2262, 4, 11, tzinfo=timezone.utc),
                         nullable=nullable)
 
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen
 _basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -48,7 +48,7 @@ def _restricted_timestamp(nullable=True):
     ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10),
     ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))]
 
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen
 _map_gens = [simple_string_to_string_map_gen] + [MapGen(f(nullable=False), f()) for f in [

diff --git a/integration_tests/src/main/python/orc_test.py b/integration_tests/src/main/python/orc_test.py
@@ -112,7 +112,7 @@ def test_basic_read(std_input_path, name, read_func, v1_enabled_list, orc_impl,
 #E                   	at org.apache.orc.TypeDescription.parseInt(TypeDescription.java:244)
 #E                   	at org.apache.orc.TypeDescription.parseType(TypeDescription.java:362)
 # ...
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen
 orc_basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -204,7 +204,7 @@ def test_read_round_trip(spark_tmp_path, orc_gens, read_func, reader_confs, v1_e
             read_func(data_path),
             conf=all_confs)
 
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen
 orc_pred_push_gens = [
@@ -283,7 +283,7 @@ def test_compress_read_round_trip(spark_tmp_path, compress, v1_enabled_list, rea
 def test_simple_partitioned_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -353,7 +353,7 @@ def test_partitioned_read_just_partitions(spark_tmp_path, v1_enabled_list, reade
 def test_merge_schema_read(spark_tmp_path, v1_enabled_list, reader_confs):
     # Once https://github.com/NVIDIA/spark-rapids/issues/131 is fixed
     # we should go with a more standard set of generators
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -837,7 +837,7 @@ def test_read_round_trip_for_multithreaded_combining(spark_tmp_path, gens, keep_
 @pytest.mark.parametrize('keep_order', [True, pytest.param(False, marks=pytest.mark.ignore_order(local=True))])
 @allow_non_gpu(*non_utc_allow_orc_scan)
 def test_simple_partitioned_read_for_multithreaded_combining(spark_tmp_path, keep_order):
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen
     orc_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -942,6 +942,9 @@ def test_orc_column_name_with_dots(spark_tmp_path, reader_confs):
                 ("f.g", int_gen),
                 ("h", string_gen)])),
             ("i.j", long_gen)])),
+        # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+        # https://github.com/rapidsai/cudf/issues/6763 .
+        # Once the first issue is fixed, add back boolean_gen for column k
         ("k", int_gen)]
     with_cpu_session(lambda spark: gen_df(spark, gens).write.orc(data_path))
     assert_gpu_and_cpu_are_equal_collect(lambda spark: reader(spark), conf=all_confs)
@@ -960,7 +963,7 @@ def test_orc_with_null_column(spark_tmp_path, reader_confs):
     def gen_null_df(spark):
         return spark.createDataFrame(
             [(None, None, None, None, None)],
-            # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+            # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
             # https://github.com/rapidsai/cudf/issues/6763 .
             # Once the first issue is fixed, add back boolean_gen
             "c1 int, c2 long, c3 float, c4 double, c5 int")
@@ -984,7 +987,7 @@ def test_orc_with_null_column_with_1m_rows(spark_tmp_path, reader_confs):
     def gen_null_df(spark):
         return spark.createDataFrame(
             data,
-            # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+            # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
             # https://github.com/rapidsai/cudf/issues/6763 .
             # Once the first issue is fixed, add back boolean_gen
             "c1 int, c2 long, c3 float, c4 double, c5 int")

diff --git a/integration_tests/src/main/python/orc_write_test.py b/integration_tests/src/main/python/orc_write_test.py
@@ -24,7 +24,7 @@
 from pyspark.sql.types import *
 
 pytestmark = pytest.mark.nightly_resource_consuming_test
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen.
 orc_write_basic_gens = [byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen,
@@ -65,7 +65,7 @@
     ArrayGen(ArrayGen(short_gen, max_length=10), max_length=10),
     ArrayGen(ArrayGen(string_gen, max_length=10), max_length=10),
     ArrayGen(StructGen([['child0', byte_gen], ['child1', string_gen], ['child2', float_gen]]))]
-# Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+# Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
 # https://github.com/rapidsai/cudf/issues/6763 .
 # Once the first issue is fixed, add back boolean_gen.
 orc_write_basic_map_gens = [simple_string_to_string_map_gen] + [MapGen(f(nullable=False), f()) for f in [
@@ -377,6 +377,9 @@ def test_orc_write_column_name_with_dots(spark_tmp_path):
                 ("f.g", int_gen),
                 ("h", string_gen)])),
             ("i.j", long_gen)])),
+        # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+        # https://github.com/rapidsai/cudf/issues/6763 .
+        # Once the first issue is fixed, add back boolean_gen for column k
         ("k", int_gen)]
     assert_gpu_and_cpu_writes_are_equal_collect(
         lambda spark, path:  gen_df(spark, gens).coalesce(1).write.orc(path),

diff --git a/integration_tests/src/main/python/schema_evolution_test.py b/integration_tests/src/main/python/schema_evolution_test.py
@@ -34,7 +34,7 @@
 
 # List of additional column data generators to use when adding columns
 _additional_gens = [
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen
     byte_gen,
@@ -51,7 +51,7 @@
     # simple_string_to_string_map_gen),
     ArrayGen(_custom_date_gen),
     struct_gen_decimal128,
-    # Use every type except boolean , see https://github.com/NVIDIA/spark-rapids/issues/11762 and
+    # Use every type except boolean, see https://github.com/NVIDIA/spark-rapids/issues/11762 and
     # https://github.com/rapidsai/cudf/issues/6763 .
     # Once the first issue is fixed, add back boolean_gen from int_gen for c1
     StructGen([("c0", ArrayGen(long_gen)), ("c1", int_gen)]),