diff --git a/integration_tests/src/main/python/datasourcev2_write_test.py b/integration_tests/src/main/python/datasourcev2_write_test.py index 88b7fd43b20..5ea98ae43c3 100644 --- a/integration_tests/src/main/python/datasourcev2_write_test.py +++ b/integration_tests/src/main/python/datasourcev2_write_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,36 +11,78 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import pytest -from asserts import assert_gpu_fallback_collect -from data_gen import * +from asserts import assert_gpu_fallback_collect, assert_equal_with_local_sort +from data_gen import gen_df from marks import * -from pyspark.sql.types import * -from spark_session import is_hive_available, is_spark_330_or_later, with_cpu_session +from spark_session import is_hive_available, is_spark_330_or_later, with_cpu_session, with_gpu_session +from hive_parquet_write_test import _hive_bucket_gens, _hive_array_gens, _hive_struct_gens +from hive_write_test import _restricted_timestamp +from hive_parquet_write_test import read_single_bucket + +_hive_write_conf = { + "hive.enforce.bucketing": "true", + "hive.exec.dynamic.partition": "true", + "hive.exec.dynamic.partition.mode": "nonstrict"} + + +@pytest.mark.skipif(not (is_hive_available() and is_spark_330_or_later()), + reason="Must have Hive on Spark 3.3+") +@pytest.mark.parametrize('file_format', ['parquet', 'orc']) +def test_write_hive_bucketed_table(spark_tmp_table_factory, file_format): + def gen_table(spark): + gen_list = [('_c' + str(i), gen) for i, gen in enumerate(_hive_bucket_gens)] + types_sql_str = ','.join('{} {}'.format( + name, gen.data_type.simpleString()) for name, gen in gen_list) + col_names_str = ','.join(name for name, gen in gen_list) + data_table = spark_tmp_table_factory.get() + gen_df(spark, gen_list).createOrReplaceTempView(data_table) + return data_table, types_sql_str, col_names_str + + (input_data, input_schema, input_cols_str) = with_cpu_session(gen_table) + num_buckets = 4 + + def write_hive_table(spark, out_table): + spark.sql( + "create table {0} ({1}) stored as {2} clustered by ({3}) into {4} buckets".format( + out_table, input_schema, file_format, input_cols_str, num_buckets)) + spark.sql( + "insert into {0} select * from {1}".format(out_table, input_data)) + + cpu_table = spark_tmp_table_factory.get() + gpu_table = spark_tmp_table_factory.get() + with_cpu_session(lambda spark: write_hive_table(spark, cpu_table), _hive_write_conf) + with_gpu_session(lambda spark: write_hive_table(spark, gpu_table), _hive_write_conf) + cur_bucket_id = 0 + while cur_bucket_id < num_buckets: + # Verify the result bucket by bucket + ret_cpu = read_single_bucket(cpu_table, cur_bucket_id) + ret_gpu = read_single_bucket(gpu_table, cur_bucket_id) + assert_equal_with_local_sort(ret_cpu, ret_gpu) + cur_bucket_id += 1 + @ignore_order -@allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,WriteFilesExec') -@pytest.mark.skipif(not (is_hive_available() and is_spark_330_or_later()), reason="Must have Hive on Spark 3.3+") -@pytest.mark.parametrize('fileFormat', ['parquet', 'orc']) -def test_write_hive_bucketed_table_fallback(spark_tmp_table_factory, fileFormat): - """ - fallback because GPU does not support Hive hash partition - """ - table = spark_tmp_table_factory.get() +@allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,SortExec,WriteFilesExec') +@pytest.mark.skipif(not (is_hive_available() and is_spark_330_or_later()), + reason="Must have Hive on Spark 3.3+") +@pytest.mark.parametrize('file_format', ['parquet', 'orc']) +@pytest.mark.parametrize('gen', [_restricted_timestamp()] + _hive_array_gens + _hive_struct_gens) +def test_write_hive_bucketed_unsupported_types_fallback(spark_tmp_table_factory, file_format, gen): + out_table = spark_tmp_table_factory.get() def create_hive_table(spark): - spark.sql("""create table {0} (a bigint, b bigint, c bigint) - stored as {1} - clustered by (b) into 3 buckets""".format(table, fileFormat)) - return None - - conf = {"hive.enforce.bucketing": "true", - "hive.exec.dynamic.partition": "true", - "hive.exec.dynamic.partition.mode": "nonstrict"} - with_cpu_session(create_hive_table, conf = conf) + spark.sql("create table {0} (a {1}) stored as {2} clustered by (a) into 3 buckets".format( + out_table, gen.data_type.simpleString(), file_format)) + data_table = spark_tmp_table_factory.get() + gen_df(spark, [('a', gen)], length=10).createOrReplaceTempView(data_table) + return data_table + input_table = with_cpu_session(create_hive_table, _hive_write_conf) assert_gpu_fallback_collect( - lambda spark: spark.sql("insert into {} values (1, 2, 3)".format(table)), + lambda spark: spark.sql( + "insert into {0} select * from {1}".format(out_table, input_table)), 'DataWritingCommandExec', - conf = conf) + _hive_write_conf) diff --git a/integration_tests/src/main/python/hive_parquet_write_test.py b/integration_tests/src/main/python/hive_parquet_write_test.py index f62439a39af..2899b49f316 100644 --- a/integration_tests/src/main/python/hive_parquet_write_test.py +++ b/integration_tests/src/main/python/hive_parquet_write_test.py @@ -14,20 +14,23 @@ import pytest -from asserts import assert_gpu_and_cpu_sql_writes_are_equal_collect +from asserts import assert_gpu_and_cpu_sql_writes_are_equal_collect, assert_equal_with_local_sort from conftest import is_databricks_runtime from data_gen import * from hive_write_test import _restricted_timestamp from marks import allow_non_gpu, ignore_order -from spark_session import with_cpu_session, is_before_spark_320, is_spark_350_or_later +from spark_session import with_cpu_session, with_gpu_session, is_before_spark_320, is_spark_350_or_later, is_before_spark_330, is_spark_330_or_later # Disable the meta conversion from Hive write to FrameData write in Spark, to test # "GpuInsertIntoHiveTable" for Parquet write. _write_to_hive_conf = {"spark.sql.hive.convertMetastoreParquet": False} -_hive_basic_gens = [ - byte_gen, short_gen, int_gen, long_gen, float_gen, double_gen, string_gen, boolean_gen, - DateGen(start=date(1590, 1, 1)), _restricted_timestamp(), +_hive_bucket_gens = [ + boolean_gen, byte_gen, short_gen, int_gen, long_gen, string_gen, float_gen, double_gen, + DateGen(start=date(1590, 1, 1))] + +_hive_basic_gens = _hive_bucket_gens + [ + _restricted_timestamp(), DecimalGen(precision=19, scale=1, nullable=True), DecimalGen(precision=23, scale=5, nullable=True), DecimalGen(precision=36, scale=3, nullable=True)] @@ -58,6 +61,20 @@ fallback_nodes = ['ProjectExec'] if is_databricks_runtime() or is_spark_350_or_later() else [] +def read_single_bucket(table, bucket_id): + # Bucket Id string format: f"_$id%05d" + ".c$fileCounter%03d". + # fileCounter is always 0 in this test. For example '_00002.c000' is for + # bucket id being 2. + # We leverage this bucket segment in the file path to filter rows belong to a bucket. + bucket_segment = '_' + "{}".format(bucket_id).rjust(5, '0') + '.c000' + return with_cpu_session( + lambda spark: spark.sql("select * from {}".format(table)) + .withColumn('file_name', f.input_file_name()) + .filter(f.col('file_name').contains(bucket_segment)) + .drop('file_name') # need to drop the file_name column for comparison. + .collect()) + + @allow_non_gpu(*(non_utc_allow + fallback_nodes)) @ignore_order(local=True) @pytest.mark.parametrize("is_ctas", [True, False], ids=['CTAS', 'CTTW']) @@ -174,3 +191,81 @@ def write_to_hive_sql(spark, output_table): spark_tmp_table_factory, write_to_hive_sql, _write_to_hive_conf) + + +@pytest.mark.skipif(is_before_spark_330(), + reason="InsertIntoHiveTable supports bucketed write since Spark 330") +def test_insert_hive_bucketed_table(spark_tmp_table_factory): + def gen_table(spark): + gen_list = [('_c' + str(i), gen) for i, gen in enumerate(_hive_bucket_gens)] + types_sql_str = ','.join('{} {}'.format( + name, gen.data_type.simpleString()) for name, gen in gen_list) + col_names_str = ','.join(name for name, gen in gen_list) + data_table = spark_tmp_table_factory.get() + gen_df(spark, gen_list).createOrReplaceTempView(data_table) + return data_table, types_sql_str, col_names_str + + (input_data, input_schema, input_cols_str) = with_cpu_session(gen_table) + num_buckets = 4 + + def insert_hive_table(spark, out_table): + spark.sql( + "CREATE TABLE {} ({}) STORED AS PARQUET CLUSTERED BY ({}) INTO {} BUCKETS".format( + out_table, input_schema, input_cols_str, num_buckets)) + spark.sql( + "INSERT OVERWRITE {} SELECT * FROM {}".format(out_table, input_data)) + + cpu_table = spark_tmp_table_factory.get() + gpu_table = spark_tmp_table_factory.get() + with_cpu_session(lambda spark: insert_hive_table(spark, cpu_table), _write_to_hive_conf) + with_gpu_session(lambda spark: insert_hive_table(spark, gpu_table), _write_to_hive_conf) + for cur_bucket_id in range(num_buckets): + # Verify the result bucket by bucket + ret_cpu = read_single_bucket(cpu_table, cur_bucket_id) + ret_gpu = read_single_bucket(gpu_table, cur_bucket_id) + assert_equal_with_local_sort(ret_cpu, ret_gpu) + + +@pytest.mark.skipif(is_spark_330_or_later(), + reason = "InsertIntoHiveTable supports bucketed write since Spark 330") +@pytest.mark.parametrize("hive_hash", [True, False]) +def test_insert_hive_bucketed_table_before_330(spark_tmp_table_factory, hive_hash): + num_buckets = 4 + + def insert_hive_table(spark, out_table): + data_table = spark_tmp_table_factory.get() + two_col_df(spark, int_gen, long_gen).createOrReplaceTempView(data_table) + spark.sql( + """CREATE TABLE {} (a int, b long) STORED AS PARQUET + CLUSTERED BY (a) INTO {} BUCKETS""".format(out_table, num_buckets)) + spark.sql( + "INSERT OVERWRITE {} SELECT * FROM {}".format(out_table, data_table)) + + all_confs = copy_and_update(_write_to_hive_conf, { + "hive.enforce.bucketing": False, # allow the write with bucket spec + "hive.enforce.sorting": False, # allow the write with bucket spec + "spark.rapids.sql.format.write.forceHiveHashForBucketing": hive_hash + }) + cpu_table = spark_tmp_table_factory.get() + gpu_table = spark_tmp_table_factory.get() + with_cpu_session(lambda spark: insert_hive_table(spark, cpu_table), all_confs) + with_gpu_session(lambda spark: insert_hive_table(spark, gpu_table), all_confs) + + all_cpu_rows = with_cpu_session( + lambda spark: spark.sql("select * from {}".format(cpu_table)).collect()) + all_gpu_rows = with_cpu_session( + lambda spark: spark.sql("select * from {}".format(gpu_table)).collect()) + assert_equal_with_local_sort(all_cpu_rows, all_gpu_rows) + + for cur_bucket_id in range(num_buckets): + ret_cpu = read_single_bucket(cpu_table, cur_bucket_id) + ret_gpu = read_single_bucket(gpu_table, cur_bucket_id) + if hive_hash: + # GPU will write the right bucketed table, but CPU will not. Because + # InsertIntoHiveTable supports bucketed write only since Spark 330. + # GPU behaviors differently than the normal Spark. + assert len(ret_gpu) > 0 and len(ret_cpu) == 0 + else: + # Both GPU and CPU write the data but no bucketing, actually. + assert len(ret_gpu) == 0 and len(ret_cpu) == 0 + diff --git a/integration_tests/src/main/python/hive_write_test.py b/integration_tests/src/main/python/hive_write_test.py index ae7052dffd7..945cc4806fb 100644 --- a/integration_tests/src/main/python/hive_write_test.py +++ b/integration_tests/src/main/python/hive_write_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -119,39 +119,21 @@ def test_optimized_hive_ctas_options_fallback(gens, storage_with_opts, spark_tmp spark_tmp_table_factory.get(), opts_string, storage, data_table)), fallback_class) -@allow_non_gpu('DataWritingCommandExec,ExecutedCommandExec,WriteFilesExec') -@pytest.mark.skipif(not (is_hive_available() and is_spark_33X() and not is_databricks122_or_later()), +@ignore_order +@pytest.mark.skipif(not (is_hive_available() and is_spark_330_or_later() and not is_databricks122_or_later()), reason="Requires Hive and Spark 3.3.X to write bucketed Hive tables") -@pytest.mark.parametrize("gens", [_basic_gens], ids=idfn) @pytest.mark.parametrize("storage", ["PARQUET", "ORC"], ids=idfn) -def test_optimized_hive_bucketed_fallback_33X(gens, storage, spark_tmp_table_factory): +def test_optimized_hive_ctas_bucketed_table(storage, spark_tmp_table_factory): in_table = spark_tmp_table_factory.get() - with_cpu_session(lambda spark: three_col_df(spark, int_gen, int_gen, int_gen).createOrReplaceTempView(in_table)) - assert_gpu_fallback_collect( - lambda spark: spark.sql( - """CREATE TABLE {} STORED AS {} - CLUSTERED BY (b) INTO 3 BUCKETS - AS SELECT * FROM {}""".format(spark_tmp_table_factory.get(), storage, in_table)), - "DataWritingCommandExec") - -# Since Spark 3.4.0, the internal "SortExec" will be pulled out by default -# from the FileFormatWriter. Then it is visible in the planning stage. -@allow_non_gpu("DataWritingCommandExec", "SortExec", "WriteFilesExec") -@pytest.mark.skipif(not (is_hive_available() and (is_spark_340_or_later() or is_databricks122_or_later())), - reason="Requires Hive and Spark 3.4+ to write bucketed Hive tables with SortExec pulled out") -@pytest.mark.parametrize("gens", [_basic_gens], ids=idfn) -@pytest.mark.parametrize("storage", ["PARQUET", "ORC"], ids=idfn) -@pytest.mark.parametrize("planned_write", [True, False], ids=idfn) -def test_optimized_hive_bucketed_fallback(gens, storage, planned_write, spark_tmp_table_factory): - in_table = spark_tmp_table_factory.get() - with_cpu_session(lambda spark: three_col_df(spark, int_gen, int_gen, int_gen).createOrReplaceTempView(in_table)) - assert_gpu_fallback_collect( - lambda spark: spark.sql( - """CREATE TABLE {} STORED AS {} - CLUSTERED BY (b) INTO 3 BUCKETS - AS SELECT * FROM {}""".format(spark_tmp_table_factory.get(), storage, in_table)), - "ExecutedCommandExec", - {"spark.sql.optimizer.plannedWrite.enabled": planned_write}) + # Supported types of Hive hash are all checked in datasourcev2_write_test, so here just + # verify the basic functionality by only the int_gen. + with_cpu_session(lambda spark: three_col_df( + spark, int_gen, int_gen, int_gen).createOrReplaceTempView(in_table)) + assert_gpu_and_cpu_sql_writes_are_equal_collect( + spark_tmp_table_factory, + lambda spark, out_table: """CREATE TABLE {} STORED AS {} + CLUSTERED BY (b) INTO 3 BUCKETS AS SELECT * FROM {}""".format( + out_table, storage, in_table)) def test_hive_copy_ints_to_long(spark_tmp_table_factory): do_hive_copy(spark_tmp_table_factory, int_gen, "INT", "BIGINT") diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala index ee402948088..2dc20a599dc 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOverrides.scala @@ -324,7 +324,8 @@ final class InsertIntoHadoopFsRelationCommandMeta( override def tagSelfForGpuInternal(): Unit = { if (GpuBucketingUtils.isHiveHashBucketing(cmd.options)) { - GpuBucketingUtils.tagForHiveBucketingWrite(this, cmd.bucketSpec, cmd.outputColumns, false) + GpuBucketingUtils.tagForHiveBucketingWrite(this, cmd.bucketSpec, cmd.outputColumns, + conf.isForceHiveHashForBucketedWrite) } else { BucketIdMetaUtils.tagForBucketingWrite(this, cmd.bucketSpec, cmd.outputColumns) } @@ -3201,6 +3202,16 @@ object GpuOverrides extends Logging { def convertToGpu(): GpuExpression = GpuXxHash64(childExprs.map(_.convertToGpu()), a.seed) }), + expr[HiveHash]( + "hive hash operator", + ExprChecks.projectOnly(TypeSig.INT, TypeSig.INT, + repeatingParamCheck = Some(RepeatingParamCheck("input", + TypeSig.commonCudfTypes + TypeSig.NULL - TypeSig.TIMESTAMP, + TypeSig.all))), + (a, conf, p, r) => new ExprMeta[HiveHash](a, conf, p, r) { + def convertToGpu(): GpuExpression = + GpuHiveHash(childExprs.map(_.convertToGpu())) + }), expr[Contains]( "Contains", ExprChecks.binaryProject(TypeSig.BOOLEAN, TypeSig.BOOLEAN, diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala index 5cb527e33cd..c7f9780ad6b 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/RapidsConf.scala @@ -1859,6 +1859,15 @@ val SHUFFLE_COMPRESSION_LZ4_CHUNK_SIZE = conf("spark.rapids.shuffle.compression. .bytesConf(ByteUnit.BYTE) .createWithDefault(64 * 1024) + val FORCE_HIVE_HASH_FOR_BUCKETED_WRITE = + conf("spark.rapids.sql.format.write.forceHiveHashForBucketing") + .doc("Hive write commands before Spark 330 use Murmur3Hash for bucketed write. " + + "When enabled, HiveHash will be always used for this instead of Murmur3. This is " + + "used to align with some customized Spark binaries before 330.") + .internal() + .booleanConf + .createWithDefault(false) + val SHUFFLE_MULTITHREADED_MAX_BYTES_IN_FLIGHT = conf("spark.rapids.shuffle.multiThreaded.maxBytesInFlight") .doc( @@ -3146,6 +3155,8 @@ class RapidsConf(conf: Map[String, String]) extends Logging { lazy val isFastSampleEnabled: Boolean = get(ENABLE_FAST_SAMPLE) + lazy val isForceHiveHashForBucketedWrite: Boolean = get(FORCE_HIVE_HASH_FOR_BUCKETED_WRITE) + lazy val isDetectDeltaLogQueries: Boolean = get(DETECT_DELTA_LOG_QUERIES) lazy val isDetectDeltaCheckpointQueries: Boolean = get(DETECT_DELTA_CHECKPOINT_QUERIES) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala index 69189b2600c..1a25cf6abc7 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuHiveFileFormat.scala @@ -45,7 +45,7 @@ object GpuHiveFileFormat extends Logging { val insertCmd = meta.wrapped // Bucketing write GpuBucketingUtils.tagForHiveBucketingWrite(meta, insertCmd.table.bucketSpec, - insertCmd.outputColumns, false) + insertCmd.outputColumns, meta.conf.isForceHiveHashForBucketedWrite) // Infer the file format from the serde string, similar as what Spark does in // RelationConversions for Hive. diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuSaveAsHiveFile.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuSaveAsHiveFile.scala index 667ad6dd4f4..08118cc11a0 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuSaveAsHiveFile.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/hive/rapids/GpuSaveAsHiveFile.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ private[hive] trait GpuSaveAsHiveFile extends GpuDataWritingCommand with SaveAsH hadoopConf: Configuration, fileFormat: ColumnarFileFormat, outputLocation: String, + forceHiveHashForBucketing: Boolean, customPartitionLocations: Map[TablePartitionSpec,String] = Map.empty, partitionAttributes: Seq[Attribute] = Nil, bucketSpec: Option[BucketSpec] = None, @@ -65,6 +66,7 @@ private[hive] trait GpuSaveAsHiveFile extends GpuDataWritingCommand with SaveAsH statsTrackers = Seq(gpuWriteJobStatsTracker(hadoopConf)), options = options, useStableSort = false, // TODO: Fetch from RapidsConf. + forceHiveHashForBucketing = forceHiveHashForBucketing, concurrentWriterPartitionFlushSize = 0L // TODO: Fetch from RapidsConf. ) } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFileFormatDataWriter.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFileFormatDataWriter.scala index 939a421e0b9..be88e7a2937 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFileFormatDataWriter.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuFileFormatDataWriter.scala @@ -35,7 +35,7 @@ import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.{BucketSpec, ExternalCatalogUtils} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec -import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeSet, Cast, Concat, Expression, Literal, Murmur3Hash, NullsFirst, ScalaUDF, UnsafeProjection} +import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeSet, Cast, Concat, Expression, HiveHash, Literal, Murmur3Hash, NullsFirst, ScalaUDF, UnsafeProjection} import org.apache.spark.sql.connector.write.DataWriter import org.apache.spark.sql.execution.datasources.{BucketingUtils, PartitioningUtils, WriteTaskResult} import org.apache.spark.sql.rapids.GpuFileFormatDataWriter._ @@ -945,9 +945,38 @@ object BucketIdMetaUtils { val hashMeta = GpuOverrides.wrapExpr(expr, meta.conf, None) hashMeta.tagForGpu() if(!hashMeta.canThisBeReplaced) { - meta.willNotWorkOnGpu(s"Hashing for generating bucket IDs can not run" + + meta.willNotWorkOnGpu(s"Murmur3 hashing for generating bucket IDs can not run" + s" on GPU. Details: ${hashMeta.explain(all=false)}") } } } + + def tagForBucketingHiveWrite(meta: RapidsMeta[_, _, _], bucketSpec: Option[BucketSpec], + outputColumns: Seq[Attribute]): Unit = { + bucketSpec.foreach { bSpec => + // Create a HiveHash expression to leverage the overriding types check. + val expr = HiveHash(bSpec.bucketColumnNames.map(n => outputColumns.find(_.name == n).get)) + val hashMeta = GpuOverrides.wrapExpr(expr, meta.conf, None) + hashMeta.tagForGpu() + if (!hashMeta.canThisBeReplaced) { + meta.willNotWorkOnGpu(s"Hive hashing for generating bucket IDs can not run" + + s" on GPU. Details: ${hashMeta.explain(all = false)}") + } + } + } + + def getWriteBucketSpecForHive( + bucketCols: Seq[Attribute], + numBuckets: Int): GpuWriterBucketSpec = { + // Hive bucketed table: use "HiveHash" and bitwise-and as bucket id expression. + // "bitwise-and" is used to handle the case of a wrong bucket id when + // the hash value is negative. + val hashId = GpuBitwiseAnd(GpuHiveHash(bucketCols), GpuLiteral(Int.MaxValue)) + val bucketIdExpression = GpuPmod(hashId, GpuLiteral(numBuckets)) + + // The bucket file name prefix is following Hive, Presto and Trino conversion, then + // Hive bucketed tables written by Plugin can be read by other SQL engines. + val fileNamePrefix = (bucketId: Int) => f"$bucketId%05d_0_" + GpuWriterBucketSpec(bucketIdExpression, fileNamePrefix) + } } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala index ece5ef5acf5..2671323ea8c 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuInsertIntoHadoopFsRelationCommand.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.rapids import java.io.IOException -import com.nvidia.spark.rapids.{ColumnarFileFormat, GpuDataWritingCommand} +import com.nvidia.spark.rapids.{ColumnarFileFormat, GpuDataWritingCommand, RapidsConf} import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.io.FileCommitProtocol @@ -165,6 +165,8 @@ case class GpuInsertIntoHadoopFsRelationCommand( } } } + val forceHiveHashForBucketing = + RapidsConf.FORCE_HIVE_HASH_FOR_BUCKETED_WRITE.get(sparkSession.sessionState.conf) val updatedPartitionPaths = GpuFileFormatWriter.write( @@ -181,6 +183,7 @@ case class GpuInsertIntoHadoopFsRelationCommand( options = options, useStableSort = useStableSort, concurrentWriterPartitionFlushSize = concurrentWriterPartitionFlushSize, + forceHiveHashForBucketing = forceHiveHashForBucketing, numStaticPartitionCols = staticPartitions.size) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala index 36236b23936..bf568480baf 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/HashFunctions.scala @@ -95,7 +95,6 @@ case class GpuMurmur3Hash(children: Seq[Expression], seed: Int) extends GpuHashE GpuColumnVector.from(GpuMurmur3Hash.compute(batch, children, seed), dataType) } - case class GpuXxHash64(children: Seq[Expression], seed: Long) extends GpuHashExpression { override def dataType: DataType = LongType @@ -108,3 +107,19 @@ case class GpuXxHash64(children: Seq[Expression], seed: Long) extends GpuHashExp } } } + +case class GpuHiveHash(children: Seq[Expression]) extends GpuHashExpression { + override def dataType: DataType = IntegerType + + override def prettyName: String = "hive-hash" + + override def columnarEval(batch: ColumnarBatch): GpuColumnVector = { + withResource(GpuProjectExec.project(batch, children)) { args => + val bases = GpuColumnVector.extractBases(args) + val normalized = bases.safeMap { cv => + HashUtils.normalizeInput(cv).asInstanceOf[ColumnView] + } + GpuColumnVector.from(withResource(normalized)(Hash.hiveHash), dataType) + } + } +} diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala index 55d9bc53704..907860d98c1 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala @@ -185,7 +185,7 @@ final class OptimizedCreateHiveTableAsSelectCommandMeta( } GpuBucketingUtils.tagForHiveBucketingWrite(this, tableDesc.bucketSpec, - cmd.outputColumns, false) + cmd.outputColumns, conf.isForceHiveHashForBucketedWrite) val serde = tableDesc.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) if (serde.contains("parquet")) { diff --git a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/spark311/GpuBucketingUtils.scala b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/spark311/GpuBucketingUtils.scala index a604267d1d9..85daf17500c 100644 --- a/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/spark311/GpuBucketingUtils.scala +++ b/sql-plugin/src/main/spark311/scala/com/nvidia/spark/rapids/shims/spark311/GpuBucketingUtils.scala @@ -28,7 +28,7 @@ package com.nvidia.spark.rapids.shims import com.nvidia.spark.rapids.RapidsMeta -import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.rapids.{BucketIdMetaUtils, GpuWriterBucketSpec} @@ -43,9 +43,7 @@ object GpuBucketingUtils { val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get) if (forceHiveHash) { // Forcely use HiveHash for Hive write commands for some customized Spark binaries. - // TODO: Cannot support this until we support Hive hash partitioning on the GPU - throw new UnsupportedOperationException("Hive hash partitioning is not supported" + - " on GPU") + BucketIdMetaUtils.getWriteBucketSpecForHive(bucketColumns, spec.numBuckets) } else { // Spark bucketed table: use `HashPartitioning.partitionIdExpression` as bucket id // expression, so that we can guarantee the data distribution is same between shuffle and @@ -67,11 +65,20 @@ object GpuBucketingUtils { def tagForHiveBucketingWrite(meta: RapidsMeta[_, _, _], bucketSpec: Option[BucketSpec], outColumns: Seq[Attribute], forceHiveHash: Boolean): Unit = { if (forceHiveHash) { - bucketSpec.foreach(_ => - meta.willNotWorkOnGpu("Hive Hashing for generating bucket IDs is not supported yet") - ) + BucketIdMetaUtils.tagForBucketingHiveWrite(meta, bucketSpec, outColumns) } else { BucketIdMetaUtils.tagForBucketingWrite(meta, bucketSpec, outColumns) } } + + // Only for GpuInsertIntoHiveTable. The "InsertIntoHiveTable" in normal Spark before 330 + // does not support the bucketed write. But some customized Spark binaries before 330 indeed + // support it. So "forceHiveHash" is introduced to give a chance to enable the bucket write. + def getBucketSpec(table: CatalogTable, forceHiveHash: Boolean): Option[BucketSpec] = { + if (forceHiveHash) { + table.bucketSpec + } else { + None + } + } } diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuCreateHiveTableAsSelectCommand.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuCreateHiveTableAsSelectCommand.scala index acdd53b74ab..7e8cc57eda4 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuCreateHiveTableAsSelectCommand.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuCreateHiveTableAsSelectCommand.scala @@ -62,7 +62,7 @@ final class GpuCreateHiveTableAsSelectCommandMeta(cmd: CreateHiveTableAsSelectCo } GpuBucketingUtils.tagForHiveBucketingWrite(this, tableDesc.bucketSpec, - cmd.outputColumns, false) + cmd.outputColumns, conf.isForceHiveHashForBucketedWrite) val catalog = spark.sessionState.catalog val tableExists = catalog.tableExists(tableDesc.identifier) diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala index 3f59d6565a5..fcd838e300f 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/hive/rapids/shims/GpuInsertIntoHiveTable.scala @@ -210,6 +210,8 @@ case class GpuInsertIntoHiveTable( // column names in order to make `loadDynamicPartitions` work. attr.withName(name.toLowerCase(Locale.ROOT)) } + val forceHiveHashForBucketing = + RapidsConf.FORCE_HIVE_HASH_FOR_BUCKETED_WRITE.get(sparkSession.sessionState.conf) val writtenParts = gpuSaveAsHiveFile( sparkSession = sparkSession, @@ -217,8 +219,9 @@ case class GpuInsertIntoHiveTable( hadoopConf = hadoopConf, fileFormat = fileFormat, outputLocation = tmpLocation.toString, + forceHiveHashForBucketing = forceHiveHashForBucketing, partitionAttributes = partitionAttributes, - bucketSpec = table.bucketSpec, + bucketSpec = GpuBucketingUtils.getBucketSpec(table, forceHiveHashForBucketing), options = GpuBucketingUtils.getOptionsWithHiveBucketWrite(table.bucketSpec)) if (partition.nonEmpty) { diff --git a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala index 4adbd7b2ef5..596c2ebd8b7 100644 --- a/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala +++ b/sql-plugin/src/main/spark311/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala @@ -106,6 +106,7 @@ object GpuFileFormatWriter extends Logging { options: Map[String, String], useStableSort: Boolean, concurrentWriterPartitionFlushSize: Long, + forceHiveHashForBucketing: Boolean = false, numStaticPartitionCols: Int = 0): Set[String] = { require(partitionColumns.size >= numStaticPartitionCols) @@ -137,7 +138,7 @@ object GpuFileFormatWriter extends Logging { } val writerBucketSpec = GpuBucketingUtils.getWriterBucketSpec(bucketSpec, dataColumns, - options, false) + options, forceHiveHashForBucketing) val sortColumns = bucketSpec.toSeq.flatMap { spec => spec.sortColumnNames.map(c => dataColumns.find(_.name == c).get) } diff --git a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/spark330/GpuBucketingUtils.scala b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/spark330/GpuBucketingUtils.scala index 0f7c9b4fd62..d743c9cbaa2 100644 --- a/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/spark330/GpuBucketingUtils.scala +++ b/sql-plugin/src/main/spark330/scala/com/nvidia/spark/rapids/shims/spark330/GpuBucketingUtils.scala @@ -37,10 +37,10 @@ package com.nvidia.spark.rapids.shims import com.nvidia.spark.rapids.RapidsMeta -import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.datasources.BucketingUtils -import org.apache.spark.sql.rapids.GpuWriterBucketSpec +import org.apache.spark.sql.rapids.{BucketIdMetaUtils, GpuWriterBucketSpec} object GpuBucketingUtils { @@ -54,9 +54,7 @@ object GpuBucketingUtils { val shouldHiveCompatibleWrite = options.getOrElse( BucketingUtils.optionForHiveCompatibleBucketWrite, "false").toBoolean if (shouldHiveCompatibleWrite) { - // TODO: Cannot support this until we support Hive hash partitioning on the GPU - throw new UnsupportedOperationException("Hive hash partitioning is not supported" + - " on GPU") + BucketIdMetaUtils.getWriteBucketSpecForHive(bucketColumns, spec.numBuckets) } else { // Spark bucketed table: use `HashPartitioning.partitionIdExpression` as bucket id // expression, so that we can guarantee the data distribution is same between shuffle and @@ -81,9 +79,11 @@ object GpuBucketingUtils { def tagForHiveBucketingWrite(meta: RapidsMeta[_, _, _], bucketSpec: Option[BucketSpec], outColumns: Seq[Attribute], forceHiveHash: Boolean): Unit = { - bucketSpec.foreach(_ => - // From Spark330, Hive write always uses HiveHash to generate bucket IDs. - meta.willNotWorkOnGpu("Hive Hashing for generating bucket IDs is not supported yet") - ) + // From Spark330, Hive write always uses HiveHash to generate bucket IDs. + BucketIdMetaUtils.tagForBucketingHiveWrite(meta, bucketSpec, outColumns) + } + + def getBucketSpec(table: CatalogTable, forceHiveHash: Boolean): Option[BucketSpec] = { + table.bucketSpec } } diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala index b3103c3c76e..0b59a86327a 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuInsertIntoHiveTable.scala @@ -199,6 +199,9 @@ case class GpuInsertIntoHiveTable( // column names in order to make `loadDynamicPartitions` work. attr.withName(name.toLowerCase(Locale.ROOT)) } + val forceHiveHashForBucketing = + RapidsConf.FORCE_HIVE_HASH_FOR_BUCKETED_WRITE.get(sparkSession.sessionState.conf) + val writtenParts = gpuSaveAsHiveFile( sparkSession = sparkSession, @@ -206,6 +209,7 @@ case class GpuInsertIntoHiveTable( hadoopConf = hadoopConf, fileFormat = fileFormat, outputLocation = tmpLocation.toString, + forceHiveHashForBucketing = forceHiveHashForBucketing, partitionAttributes = partitionAttributes, bucketSpec = table.bucketSpec, options = GpuBucketingUtils.getOptionsWithHiveBucketWrite(table.bucketSpec)) diff --git a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala index e74bf979af9..4b47e69558c 100644 --- a/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala +++ b/sql-plugin/src/main/spark332db/scala/com/nvidia/spark/rapids/shims/GpuOptimizedCreateHiveTableAsSelectCommandShims.scala @@ -199,7 +199,8 @@ final class OptimizedCreateHiveTableAsSelectCommandMeta( val outputColumns = DataWritingCommand.logicalPlanOutputWithNames(cmd.query, cmd.outputColumnNames) - GpuBucketingUtils.tagForHiveBucketingWrite(this, tableDesc.bucketSpec, outputColumns, false) + GpuBucketingUtils.tagForHiveBucketingWrite(this, tableDesc.bucketSpec, outputColumns, + conf.isForceHiveHashForBucketedWrite) val serde = tableDesc.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) if (serde.contains("parquet")) { diff --git a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala index 874d89353aa..a2634f54da0 100644 --- a/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala +++ b/sql-plugin/src/main/spark332db/scala/org/apache/spark/sql/rapids/GpuFileFormatWriter.scala @@ -99,6 +99,7 @@ object GpuFileFormatWriter extends Logging { options: Map[String, String], useStableSort: Boolean, concurrentWriterPartitionFlushSize: Long, + forceHiveHashForBucketing: Boolean = false, numStaticPartitionCols: Int = 0): Set[String] = { require(partitionColumns.size >= numStaticPartitionCols) @@ -120,7 +121,7 @@ object GpuFileFormatWriter extends Logging { val dataColumns = finalOutputSpec.outputColumns.filterNot(partitionSet.contains) val writerBucketSpec = GpuBucketingUtils.getWriterBucketSpec(bucketSpec, dataColumns, - options, false) + options, forceHiveHashForBucketing) val sortColumns = bucketSpec.toSeq.flatMap { spec => spec.sortColumnNames.map(c => dataColumns.find(_.name == c).get) } diff --git a/tests/src/test/scala/com/nvidia/spark/rapids/HiveHashTestSuite.scala b/tests/src/test/scala/com/nvidia/spark/rapids/HiveHashTestSuite.scala new file mode 100644 index 00000000000..080b52a38ef --- /dev/null +++ b/tests/src/test/scala/com/nvidia/spark/rapids/HiveHashTestSuite.scala @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.nvidia.spark.rapids + +import java.lang.{Byte => JByte} + +import ai.rapids.cudf.{ColumnVector => CudfColumnVector} +import com.nvidia.spark.rapids.Arm.{closeOnExcept, withResource} + +import org.apache.spark.sql.catalyst.expressions.{BoundReference, ExprId, HiveHash} +import org.apache.spark.sql.rapids.GpuHiveHash +import org.apache.spark.sql.types._ +import org.apache.spark.sql.vectorized.ColumnarBatch + +class HiveHashTestSuite extends SparkQueryCompareTestSuite { + // All columns should have the same length(now is 15) for multiple columns tests. + def genBoolColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromBoxedBooleans(null, true, null, false, true, true, true, + false, false, null, false, true, null, false, true), + BooleanType) + + def genByteColumn: GpuColumnVector = { + val bytes: Seq[JByte] = Seq[JByte](null, Byte.MaxValue, Byte.MinValue, null, null) ++ + Seq(0, -0, 1, -1, 10, -10, 126, -126, 111, -111).map(b => JByte.valueOf(b.toByte)) + GpuColumnVector.from(CudfColumnVector.fromBoxedBytes(bytes: _*), ByteType) + } + + def genIntColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromBoxedInts(null, Int.MaxValue, Int.MinValue, 0, -0, 1, -1, + null, 100, -100, null, null, 99, -99, null), + IntegerType) + + def genLongColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromBoxedLongs(null, Long.MaxValue, Long.MinValue, 0, -0, 1, -1, + null, 100, -100, null, null, 99, -99, null), + LongType) + + def genStringColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromStrings(null, "", "1", "432", "a\nb", "a", "`@@$*&", " ", "\t", + "dE\"\u0100\t\u0101 \ud720\ud721", "\ud720\ud721\ud720\ud721", "''", null, " ", + "This is a long string (greater than 128 bytes/char string) case to test this " + + "hash function. Just want an abnormal case here to see if any error may " + + "happen when doing the hive hashing"), + StringType) + + def genFloatColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromBoxedFloats(null, 0.0f, -0.0f, 99.0f, -99.0f, Float.NaN, + Float.MaxValue, Float.MinValue, Float.MinPositiveValue, Float.NegativeInfinity, + Float.PositiveInfinity, + FLOAT_POSITIVE_NAN_LOWER_RANGE, FLOAT_POSITIVE_NAN_UPPER_RANGE, + FLOAT_NEGATIVE_NAN_LOWER_RANGE, FLOAT_NEGATIVE_NAN_UPPER_RANGE), + FloatType) + + def genDoubleColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.fromBoxedDoubles(null, 0.0, -0.0, 199.0, -199.0, Double.NaN, + Double.MaxValue, Double.MinValue, Double.MinPositiveValue, Double.NegativeInfinity, + Double.PositiveInfinity, + DOUBLE_POSITIVE_NAN_LOWER_RANGE, DOUBLE_POSITIVE_NAN_UPPER_RANGE, + DOUBLE_NEGATIVE_NAN_LOWER_RANGE, DOUBLE_NEGATIVE_NAN_UPPER_RANGE), + DoubleType) + + def genDateColumn: GpuColumnVector = GpuColumnVector.from( + CudfColumnVector.timestampDaysFromBoxedInts(null, 0, null, 100, -100, 0x12345678, + null, -0x12345678, 171922, 19899, 17766, -0, 16897, null, 18888), + DateType) + + private def testHiveHashOnGpuAndCpuThenClose(cols: GpuColumnVector*): Unit = { + val (numRows, cpuRefs, gpuRefs) = closeOnExcept(cols) { _ => + val rowsNum = cols.head.getRowCount + require(cols.tail.forall(_.getRowCount == rowsNum), + s"All the input columns should have the same length: $rowsNum.") + + val cpuRefs = cols.zipWithIndex.map { case (col, id) => + BoundReference(id, col.dataType(), nullable = true) + } + val gpuRefs = cols.zipWithIndex.map { case (col, id) => + GpuBoundReference(id, col.dataType(), nullable = true)(ExprId(id), + s"col${id}_${col.dataType().simpleString}") + } + (rowsNum.toInt, cpuRefs, gpuRefs) + } + + // GPU run + val gpuRet = closeOnExcept(cols) { _ => + val inputCB = new ColumnarBatch(cols.toArray, numRows) + val hostRet = withResource(GpuHiveHash(gpuRefs).columnarEval(inputCB)) { retCol => + retCol.copyToHost() + } + withResource(hostRet) { _ => + (0 until numRows).map(hostRet.getInt).toArray + } + } + // CPU run + val cpuRet = withResource(cols) { _ => + withResource(new ColumnarBatch(cols.map(_.copyToHost()).toArray, numRows)) { cb => + val hiveHash = HiveHash(cpuRefs) + val it = cb.rowIterator() + (0 until numRows).map(_ => hiveHash.eval(it.next())).toArray + } + } + assertResult(cpuRet)(gpuRet) + } + + test("Test hive hash booleans") { + testHiveHashOnGpuAndCpuThenClose(genBoolColumn) + } + + test("Test hive hash bytes") { + testHiveHashOnGpuAndCpuThenClose(genByteColumn) + } + + test("Test hive hash ints") { + testHiveHashOnGpuAndCpuThenClose(genIntColumn) + } + + test("Test hive hash longs") { + testHiveHashOnGpuAndCpuThenClose(genLongColumn) + } + + test("Test hive hash floats") { + testHiveHashOnGpuAndCpuThenClose(genFloatColumn) + } + + test("Test hive hash doubles") { + testHiveHashOnGpuAndCpuThenClose(genDoubleColumn) + } + + test("Test hive hash dates") { + testHiveHashOnGpuAndCpuThenClose(genDateColumn) + } + + test("Test hive hash strings") { + testHiveHashOnGpuAndCpuThenClose(genStringColumn) + } + + test("Test hive hash mixed {bytes, ints, longs, dates}") { + val cols = closeOnExcept(new Array[GpuColumnVector](4)) { buf => + buf(0) = genByteColumn + buf(1) = genIntColumn + buf(2) = genLongColumn + buf(3) = genDateColumn + buf + } + testHiveHashOnGpuAndCpuThenClose(cols: _*) + } + + test("Test hive hash mixed {booleans, floats, doubles, strings}") { + val cols = closeOnExcept(new Array[GpuColumnVector](4)) { buf => + buf(0) = genBoolColumn + buf(1) = genFloatColumn + buf(2) = genDoubleColumn + buf(3) = genStringColumn + buf + } + testHiveHashOnGpuAndCpuThenClose(cols: _*) + } +} diff --git a/tools/generated_files/311/operatorsScore.csv b/tools/generated_files/311/operatorsScore.csv index e3f8d1053c1..c5c2080694c 100644 --- a/tools/generated_files/311/operatorsScore.csv +++ b/tools/generated_files/311/operatorsScore.csv @@ -126,6 +126,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/311/supportedExprs.csv b/tools/generated_files/311/supportedExprs.csv index 5f57725522f..294651e8902 100644 --- a/tools/generated_files/311/supportedExprs.csv +++ b/tools/generated_files/311/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/312/operatorsScore.csv b/tools/generated_files/312/operatorsScore.csv index e3f8d1053c1..c5c2080694c 100644 --- a/tools/generated_files/312/operatorsScore.csv +++ b/tools/generated_files/312/operatorsScore.csv @@ -126,6 +126,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/312/supportedExprs.csv b/tools/generated_files/312/supportedExprs.csv index 5f57725522f..294651e8902 100644 --- a/tools/generated_files/312/supportedExprs.csv +++ b/tools/generated_files/312/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/313/operatorsScore.csv b/tools/generated_files/313/operatorsScore.csv index e3f8d1053c1..c5c2080694c 100644 --- a/tools/generated_files/313/operatorsScore.csv +++ b/tools/generated_files/313/operatorsScore.csv @@ -126,6 +126,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/313/supportedExprs.csv b/tools/generated_files/313/supportedExprs.csv index 5f57725522f..294651e8902 100644 --- a/tools/generated_files/313/supportedExprs.csv +++ b/tools/generated_files/313/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/320/operatorsScore.csv b/tools/generated_files/320/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/320/operatorsScore.csv +++ b/tools/generated_files/320/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/320/supportedExprs.csv b/tools/generated_files/320/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/320/supportedExprs.csv +++ b/tools/generated_files/320/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/321/operatorsScore.csv b/tools/generated_files/321/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/321/operatorsScore.csv +++ b/tools/generated_files/321/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/321/supportedExprs.csv b/tools/generated_files/321/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/321/supportedExprs.csv +++ b/tools/generated_files/321/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/321cdh/operatorsScore.csv b/tools/generated_files/321cdh/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/321cdh/operatorsScore.csv +++ b/tools/generated_files/321cdh/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/321cdh/supportedExprs.csv b/tools/generated_files/321cdh/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/321cdh/supportedExprs.csv +++ b/tools/generated_files/321cdh/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/322/operatorsScore.csv b/tools/generated_files/322/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/322/operatorsScore.csv +++ b/tools/generated_files/322/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/322/supportedExprs.csv b/tools/generated_files/322/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/322/supportedExprs.csv +++ b/tools/generated_files/322/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/323/operatorsScore.csv b/tools/generated_files/323/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/323/operatorsScore.csv +++ b/tools/generated_files/323/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/323/supportedExprs.csv b/tools/generated_files/323/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/323/supportedExprs.csv +++ b/tools/generated_files/323/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/324/operatorsScore.csv b/tools/generated_files/324/operatorsScore.csv index a9606fe77d9..0c7295f95ac 100644 --- a/tools/generated_files/324/operatorsScore.csv +++ b/tools/generated_files/324/operatorsScore.csv @@ -130,6 +130,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/324/supportedExprs.csv b/tools/generated_files/324/supportedExprs.csv index 937ed7ae569..4c44f3f814e 100644 --- a/tools/generated_files/324/supportedExprs.csv +++ b/tools/generated_files/324/supportedExprs.csv @@ -254,6 +254,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/330/operatorsScore.csv b/tools/generated_files/330/operatorsScore.csv index 7de435ebbc7..235432d36e1 100644 --- a/tools/generated_files/330/operatorsScore.csv +++ b/tools/generated_files/330/operatorsScore.csv @@ -135,6 +135,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/330/supportedExprs.csv b/tools/generated_files/330/supportedExprs.csv index ce504a2ca68..a2e7018c42f 100644 --- a/tools/generated_files/330/supportedExprs.csv +++ b/tools/generated_files/330/supportedExprs.csv @@ -263,6 +263,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/330cdh/operatorsScore.csv b/tools/generated_files/330cdh/operatorsScore.csv index 7de435ebbc7..235432d36e1 100644 --- a/tools/generated_files/330cdh/operatorsScore.csv +++ b/tools/generated_files/330cdh/operatorsScore.csv @@ -135,6 +135,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/330cdh/supportedExprs.csv b/tools/generated_files/330cdh/supportedExprs.csv index ce504a2ca68..a2e7018c42f 100644 --- a/tools/generated_files/330cdh/supportedExprs.csv +++ b/tools/generated_files/330cdh/supportedExprs.csv @@ -263,6 +263,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/331/operatorsScore.csv b/tools/generated_files/331/operatorsScore.csv index b1168d0fa6b..6b4cd0349c6 100644 --- a/tools/generated_files/331/operatorsScore.csv +++ b/tools/generated_files/331/operatorsScore.csv @@ -136,6 +136,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/331/supportedExprs.csv b/tools/generated_files/331/supportedExprs.csv index 44a7a8b977c..43621250ae2 100644 --- a/tools/generated_files/331/supportedExprs.csv +++ b/tools/generated_files/331/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/332/operatorsScore.csv b/tools/generated_files/332/operatorsScore.csv index b1168d0fa6b..6b4cd0349c6 100644 --- a/tools/generated_files/332/operatorsScore.csv +++ b/tools/generated_files/332/operatorsScore.csv @@ -136,6 +136,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/332/supportedExprs.csv b/tools/generated_files/332/supportedExprs.csv index 44a7a8b977c..43621250ae2 100644 --- a/tools/generated_files/332/supportedExprs.csv +++ b/tools/generated_files/332/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/332cdh/operatorsScore.csv b/tools/generated_files/332cdh/operatorsScore.csv index b1168d0fa6b..6b4cd0349c6 100644 --- a/tools/generated_files/332cdh/operatorsScore.csv +++ b/tools/generated_files/332cdh/operatorsScore.csv @@ -136,6 +136,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/332cdh/supportedExprs.csv b/tools/generated_files/332cdh/supportedExprs.csv index 44a7a8b977c..43621250ae2 100644 --- a/tools/generated_files/332cdh/supportedExprs.csv +++ b/tools/generated_files/332cdh/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/333/operatorsScore.csv b/tools/generated_files/333/operatorsScore.csv index b1168d0fa6b..6b4cd0349c6 100644 --- a/tools/generated_files/333/operatorsScore.csv +++ b/tools/generated_files/333/operatorsScore.csv @@ -136,6 +136,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/333/supportedExprs.csv b/tools/generated_files/333/supportedExprs.csv index 44a7a8b977c..43621250ae2 100644 --- a/tools/generated_files/333/supportedExprs.csv +++ b/tools/generated_files/333/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/334/operatorsScore.csv b/tools/generated_files/334/operatorsScore.csv index b1168d0fa6b..6b4cd0349c6 100644 --- a/tools/generated_files/334/operatorsScore.csv +++ b/tools/generated_files/334/operatorsScore.csv @@ -136,6 +136,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/334/supportedExprs.csv b/tools/generated_files/334/supportedExprs.csv index 44a7a8b977c..43621250ae2 100644 --- a/tools/generated_files/334/supportedExprs.csv +++ b/tools/generated_files/334/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/340/operatorsScore.csv b/tools/generated_files/340/operatorsScore.csv index 161fcc90e7b..575e3ba3617 100644 --- a/tools/generated_files/340/operatorsScore.csv +++ b/tools/generated_files/340/operatorsScore.csv @@ -137,6 +137,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/340/supportedExprs.csv b/tools/generated_files/340/supportedExprs.csv index 63bfecc4ce3..6a997b6c8e9 100644 --- a/tools/generated_files/340/supportedExprs.csv +++ b/tools/generated_files/340/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/341/operatorsScore.csv b/tools/generated_files/341/operatorsScore.csv index 161fcc90e7b..575e3ba3617 100644 --- a/tools/generated_files/341/operatorsScore.csv +++ b/tools/generated_files/341/operatorsScore.csv @@ -137,6 +137,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/341/supportedExprs.csv b/tools/generated_files/341/supportedExprs.csv index 63bfecc4ce3..6a997b6c8e9 100644 --- a/tools/generated_files/341/supportedExprs.csv +++ b/tools/generated_files/341/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/342/operatorsScore.csv b/tools/generated_files/342/operatorsScore.csv index 161fcc90e7b..575e3ba3617 100644 --- a/tools/generated_files/342/operatorsScore.csv +++ b/tools/generated_files/342/operatorsScore.csv @@ -137,6 +137,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/342/supportedExprs.csv b/tools/generated_files/342/supportedExprs.csv index 63bfecc4ce3..6a997b6c8e9 100644 --- a/tools/generated_files/342/supportedExprs.csv +++ b/tools/generated_files/342/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/343/operatorsScore.csv b/tools/generated_files/343/operatorsScore.csv index 161fcc90e7b..575e3ba3617 100644 --- a/tools/generated_files/343/operatorsScore.csv +++ b/tools/generated_files/343/operatorsScore.csv @@ -137,6 +137,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/343/supportedExprs.csv b/tools/generated_files/343/supportedExprs.csv index 63bfecc4ce3..6a997b6c8e9 100644 --- a/tools/generated_files/343/supportedExprs.csv +++ b/tools/generated_files/343/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/350/operatorsScore.csv b/tools/generated_files/350/operatorsScore.csv index d98c632ac68..17b19e9a810 100644 --- a/tools/generated_files/350/operatorsScore.csv +++ b/tools/generated_files/350/operatorsScore.csv @@ -138,6 +138,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/350/supportedExprs.csv b/tools/generated_files/350/supportedExprs.csv index 6c34e85c530..1bf25af11a3 100644 --- a/tools/generated_files/350/supportedExprs.csv +++ b/tools/generated_files/350/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA diff --git a/tools/generated_files/351/operatorsScore.csv b/tools/generated_files/351/operatorsScore.csv index d98c632ac68..17b19e9a810 100644 --- a/tools/generated_files/351/operatorsScore.csv +++ b/tools/generated_files/351/operatorsScore.csv @@ -138,6 +138,7 @@ GreaterThan,4 GreaterThanOrEqual,4 Greatest,4 HiveGenericUDF,4 +HiveHash,4 HiveSimpleUDF,4 Hour,4 Hypot,4 diff --git a/tools/generated_files/351/supportedExprs.csv b/tools/generated_files/351/supportedExprs.csv index 6c34e85c530..1bf25af11a3 100644 --- a/tools/generated_files/351/supportedExprs.csv +++ b/tools/generated_files/351/supportedExprs.csv @@ -265,6 +265,8 @@ GreaterThanOrEqual,S,`>=`,None,AST,rhs,S,S,S,S,S,NS,NS,S,PS,S,NS,NS,NS,NS,NS,NA, GreaterThanOrEqual,S,`>=`,None,AST,result,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Greatest,S,`greatest`,None,project,param,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA Greatest,S,`greatest`,None,project,result,S,S,S,S,S,S,S,S,PS,S,S,S,NS,NS,NS,NA,NS,NS,NA,NA +HiveHash,S, ,None,project,input,S,S,S,S,S,S,S,S,NS,S,NS,S,NS,NS,NS,NS,NS,NS,NS,NS +HiveHash,S, ,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,input,NA,NA,NA,NA,NA,NA,NA,NA,PS,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hour,S,`hour`,None,project,result,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA Hypot,S,`hypot`,None,project,lhs,NA,NA,NA,NA,NA,NA,S,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA