Skip to content

Commit

Permalink
Update jni
Browse files Browse the repository at this point in the history
Signed-off-by: Nghia Truong <[email protected]>
  • Loading branch information
ttnghia committed Dec 21, 2023
1 parent 98dc423 commit a87f3e4
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 3,794 deletions.
8 changes: 2 additions & 6 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
# ##################################################################################################
# * dependencies ----------------------------------------------------------------------------------

# find libcu++
include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)

# find thrust/cub
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
# find CCCL
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)

# JNI
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -174,7 +171,6 @@ add_library(
src/map_utils.cu
src/murmur_hash.cu
src/parse_uri.cu
src/row_conversion.cu
src/timezones.cu
src/utilities.cu
src/xxhash64.cu
Expand Down
100 changes: 43 additions & 57 deletions src/main/cpp/benchmarks/row_conversion.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,21 +24,16 @@
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_utilities.hpp>

void fixed_width(nvbench::state& state)
{
void fixed_width(nvbench::state &state) {
cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
auto const direction = state.get_string("direction");
auto const table = create_random_table(cycle_dtypes({cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64},
212),
row_count{n_rows});
auto const table = create_random_table(
cycle_dtypes(
{cudf::type_id::INT8, cudf::type_id::INT32, cudf::type_id::INT16,
cudf::type_id::INT64, cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64},
212),
row_count{n_rows});

std::vector<cudf::data_type> schema;
cudf::size_type bytes_per_row = 0;
Expand All @@ -48,15 +43,15 @@ void fixed_width(nvbench::state& state)
bytes_per_row += cudf::size_of(t);
}

auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
} else {
for (auto const& r : rows) {
for (auto const &r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
}
}
});
Expand All @@ -65,47 +60,38 @@ void fixed_width(nvbench::state& state)
state.add_global_memory_reads<int64_t>(bytes_per_row * table->num_rows());
}

static void variable_or_fixed_width(nvbench::state& state)
{
static void variable_or_fixed_width(nvbench::state &state) {
cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
auto const direction = state.get_string("direction");
auto const direction = state.get_string("direction");
auto const include_strings = state.get_string("strings");

if (n_rows > 1 * 1024 * 1024 && include_strings == "include strings") {
state.skip("Too many rows for strings will cause memory issues");
return;
}

std::vector<cudf::type_id> const table_types = [&]() -> std::vector<cudf::type_id> {
std::vector<cudf::type_id> const table_types =
[&]() -> std::vector<cudf::type_id> {
if (include_strings == "include strings") {
return {cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::STRING,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64};
return {cudf::type_id::INT8, cudf::type_id::INT32,
cudf::type_id::INT16, cudf::type_id::INT64,
cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::STRING, cudf::type_id::UINT16,
cudf::type_id::UINT8, cudf::type_id::UINT64};
} else {
return {cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64};
return {
cudf::type_id::INT8, cudf::type_id::INT32, cudf::type_id::INT16,
cudf::type_id::INT64, cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64};
}
}();

auto const table = create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});
auto const table =
create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});

std::vector<cudf::data_type> schema;
cudf::size_type bytes_per_row = 0;
cudf::size_type string_bytes = 0;
cudf::size_type string_bytes = 0;
for (int i = 0; i < table->num_columns(); ++i) {
auto t = table->get_column(i).type();
schema.push_back(t);
Expand All @@ -117,16 +103,16 @@ static void variable_or_fixed_width(nvbench::state& state)
}
}

auto rows = spark_rapids_jni::convert_to_rows(table->view());
auto rows = cudf::convert_to_rows(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
auto new_rows = cudf::convert_to_rows(table->view());
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows(table->view());
auto _rows = cudf::convert_to_rows(table->view());
} else {
for (auto const& r : rows) {
for (auto const &r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows(l, schema);
auto out = cudf::convert_from_rows(l, schema);
}
}
});
Expand All @@ -136,12 +122,12 @@ static void variable_or_fixed_width(nvbench::state& state)
}

NVBENCH_BENCH(fixed_width)
.set_name("Fixed Width Only")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"});
.set_name("Fixed Width Only")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"});

NVBENCH_BENCH(variable_or_fixed_width)
.set_name("Fixed or Variable Width")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"})
.add_string_axis("strings", {"include strings", "no strings"});
.set_name("Fixed or Variable Width")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"})
.add_string_axis("strings", {"include strings", "no strings"});
74 changes: 37 additions & 37 deletions src/main/cpp/src/RowConversionJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,97 +21,97 @@
extern "C" {

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(JNIEnv* env,
jclass,
jlong input_table)
{
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(
JNIEnv *env, jclass, jlong input_table) {
JNI_NULL_CHECK(env, input_table, "input table is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
cudf::table_view const *n_input_table =
reinterpret_cast<cudf::table_view const *>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows_fixed_width_optimized(*n_input_table);
cudf::jni::convert_to_rows_fixed_width_optimized(*n_input_table);
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
return cudf::jni::release_as_jlong(col);
});
std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
[](auto &col) { return cudf::jni::release_as_jlong(col); });
return outcol_handles.get_jArray();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(JNIEnv* env, jclass, jlong input_table)
{
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(
JNIEnv *env, jclass, jlong input_table) {
JNI_NULL_CHECK(env, input_table, "input table is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
cudf::table_view const *n_input_table =
reinterpret_cast<cudf::table_view const *>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows(*n_input_table);
cudf::jni::convert_to_rows(*n_input_table);
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
return cudf::jni::release_as_jlong(col);
});
std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
[](auto &col) { return cudf::jni::release_as_jlong(col); });
return outcol_handles.get_jArray();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRowsFixedWidthOptimized(
JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
{
JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, types, "types is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
cudf::lists_column_view const list_input{
*reinterpret_cast<cudf::column_view *>(input_column)};
cudf::jni::native_jintArray n_types(env, types);
cudf::jni::native_jintArray n_scale(env, scale);
if (n_types.size() != n_scale.size()) {
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
"types and scales must match size", NULL);
}
std::vector<cudf::data_type> types_vec;
std::transform(n_types.begin(),
n_types.end(),
n_scale.begin(),
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
std::back_inserter(types_vec), [](jint type, jint scale) {
return cudf::jni::make_data_type(type, scale);
});
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows_fixed_width_optimized(list_input, types_vec);
cudf::jni::convert_from_rows_fixed_width_optimized(list_input,
types_vec);
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
{
JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, types, "types is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
cudf::lists_column_view const list_input{
*reinterpret_cast<cudf::column_view *>(input_column)};
cudf::jni::native_jintArray n_types(env, types);
cudf::jni::native_jintArray n_scale(env, scale);
if (n_types.size() != n_scale.size()) {
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
"types and scales must match size", NULL);
}
std::vector<cudf::data_type> types_vec;
std::transform(n_types.begin(),
n_types.end(),
n_scale.begin(),
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
std::back_inserter(types_vec), [](jint type, jint scale) {
return cudf::jni::make_data_type(type, scale);
});
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows(list_input, types_vec);
cudf::jni::convert_from_rows(list_input, types_vec);
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
Expand Down
Loading

0 comments on commit a87f3e4

Please sign in to comment.