Skip to content

Commit

Permalink
Expose max nesting depth in hash function to plugin (#2680)
Browse files Browse the repository at this point in the history
* Expose max nesting depth in hash function to plugin

Signed-off-by: Yan Feng <[email protected]>

* Format C++ code

Signed-off-by: Yan Feng <[email protected]>

* Remove MAX_NESTED_DEPTH definition from xxhash64

Signed-off-by: Yan Feng <[email protected]>

* Update src/main/cpp/src/HashJni.cpp

Co-authored-by: Chong Gao <[email protected]>

* Update src/main/cpp/src/HashJni.cpp

Co-authored-by: Nghia Truong <[email protected]>

* Rename MAX_NESTED_DEPTH to MAX_STACK_DEPTH

Signed-off-by: Yan Feng <[email protected]>

---------

Signed-off-by: Yan Feng <[email protected]>
Co-authored-by: Chong Gao <[email protected]>
Co-authored-by: Nghia Truong <[email protected]>
  • Loading branch information
3 people authored Dec 12, 2024
1 parent de713e8 commit 32834e6
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 4 deletions.
5 changes: 5 additions & 0 deletions src/main/cpp/src/HashJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@

extern "C" {

JNIEXPORT jint JNICALL Java_com_nvidia_spark_rapids_jni_Hash_getMaxStackDepth(JNIEnv* env, jclass)
{
return spark_rapids_jni::MAX_STACK_DEPTH;
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_Hash_murmurHash32(
JNIEnv* env, jclass, jint seed, jlongArray column_handles)
{
Expand Down
1 change: 1 addition & 0 deletions src/main/cpp/src/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
namespace spark_rapids_jni {

constexpr int64_t DEFAULT_XXHASH64_SEED = 42;
constexpr int MAX_STACK_DEPTH = 8;

/**
* @brief Computes the murmur32 hash value of each row in the input set of columns.
Expand Down
3 changes: 1 addition & 2 deletions src/main/cpp/src/hive_hash.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "hash.cuh"
#include "hash.hpp"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
Expand All @@ -37,8 +38,6 @@ using hive_hash_value_t = int32_t;
constexpr hive_hash_value_t HIVE_HASH_FACTOR = 31;
constexpr hive_hash_value_t HIVE_INIT_HASH = 0;

constexpr int MAX_STACK_DEPTH = 8;

hive_hash_value_t __device__ inline compute_int(int32_t key) { return key; }

hive_hash_value_t __device__ inline compute_long(int64_t key)
Expand Down
3 changes: 1 addition & 2 deletions src/main/cpp/src/xxhash64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "hash.cuh"
#include "hash.hpp"

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/utilities/algorithm.cuh>
Expand All @@ -34,8 +35,6 @@ namespace {
using hash_value_type = int64_t;
using half_size_type = int32_t;

constexpr int MAX_STACK_DEPTH = 8;

constexpr __device__ inline int64_t rotate_bits_left_signed(hash_value_type h, int8_t r)
{
return (h << r) | (h >> (64 - r)) & ~(-1 << r);
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/com/nvidia/spark/rapids/jni/Hash.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ public class Hash {
// there doesn't appear to be a useful constant in spark to reference. this could break.
static final long DEFAULT_XXHASH64_SEED = 42;

public static final int MAX_STACK_DEPTH = getMaxStackDepth();

static {
NativeDepsLoader.loadNativeDeps();
}
Expand Down Expand Up @@ -100,6 +102,8 @@ public static ColumnVector hiveHash(ColumnView columns[]) {
return new ColumnVector(hiveHash(columnViews));
}

private static native int getMaxStackDepth();

private static native long murmurHash32(int seed, long[] viewHandles) throws CudfException;

private static native long xxhash64(long seed, long[] viewHandles) throws CudfException;
Expand Down

0 comments on commit 32834e6

Please sign in to comment.