Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update jni to remove row conversion code #1668

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
# ##################################################################################################
# * dependencies ----------------------------------------------------------------------------------

# find libcu++
include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)

# find thrust/cub
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
# find CCCL
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)

# JNI
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -174,7 +171,6 @@ add_library(
src/map_utils.cu
src/murmur_hash.cu
src/parse_uri.cu
src/row_conversion.cu
src/timezones.cu
src/utilities.cu
src/xxhash64.cu
Expand Down
100 changes: 43 additions & 57 deletions src/main/cpp/benchmarks/row_conversion.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -24,21 +24,16 @@
#include <cudf/strings/strings_column_view.hpp>
#include <cudf_test/column_utilities.hpp>

void fixed_width(nvbench::state& state)
{
void fixed_width(nvbench::state &state) {
cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
auto const direction = state.get_string("direction");
auto const table = create_random_table(cycle_dtypes({cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64},
212),
row_count{n_rows});
auto const table = create_random_table(
cycle_dtypes(
{cudf::type_id::INT8, cudf::type_id::INT32, cudf::type_id::INT16,
cudf::type_id::INT64, cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64},
212),
row_count{n_rows});

std::vector<cudf::data_type> schema;
cudf::size_type bytes_per_row = 0;
Expand All @@ -48,15 +43,15 @@ void fixed_width(nvbench::state& state)
bytes_per_row += cudf::size_of(t);
}

auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
} else {
for (auto const& r : rows) {
for (auto const &r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
}
}
});
Expand All @@ -65,47 +60,38 @@ void fixed_width(nvbench::state& state)
state.add_global_memory_reads<int64_t>(bytes_per_row * table->num_rows());
}

static void variable_or_fixed_width(nvbench::state& state)
{
static void variable_or_fixed_width(nvbench::state &state) {
cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
auto const direction = state.get_string("direction");
auto const direction = state.get_string("direction");
auto const include_strings = state.get_string("strings");

if (n_rows > 1 * 1024 * 1024 && include_strings == "include strings") {
state.skip("Too many rows for strings will cause memory issues");
return;
}

std::vector<cudf::type_id> const table_types = [&]() -> std::vector<cudf::type_id> {
std::vector<cudf::type_id> const table_types =
[&]() -> std::vector<cudf::type_id> {
if (include_strings == "include strings") {
return {cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::STRING,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64};
return {cudf::type_id::INT8, cudf::type_id::INT32,
cudf::type_id::INT16, cudf::type_id::INT64,
cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::STRING, cudf::type_id::UINT16,
cudf::type_id::UINT8, cudf::type_id::UINT64};
} else {
return {cudf::type_id::INT8,
cudf::type_id::INT32,
cudf::type_id::INT16,
cudf::type_id::INT64,
cudf::type_id::INT32,
cudf::type_id::BOOL8,
cudf::type_id::UINT16,
cudf::type_id::UINT8,
cudf::type_id::UINT64};
return {
cudf::type_id::INT8, cudf::type_id::INT32, cudf::type_id::INT16,
cudf::type_id::INT64, cudf::type_id::INT32, cudf::type_id::BOOL8,
cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64};
}
}();

auto const table = create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});
auto const table =
create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});

std::vector<cudf::data_type> schema;
cudf::size_type bytes_per_row = 0;
cudf::size_type string_bytes = 0;
cudf::size_type string_bytes = 0;
for (int i = 0; i < table->num_columns(); ++i) {
auto t = table->get_column(i).type();
schema.push_back(t);
Expand All @@ -117,16 +103,16 @@ static void variable_or_fixed_width(nvbench::state& state)
}
}

auto rows = spark_rapids_jni::convert_to_rows(table->view());
auto rows = cudf::convert_to_rows(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
auto new_rows = cudf::convert_to_rows(table->view());
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows(table->view());
auto _rows = cudf::convert_to_rows(table->view());
} else {
for (auto const& r : rows) {
for (auto const &r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows(l, schema);
auto out = cudf::convert_from_rows(l, schema);
}
}
});
Expand All @@ -136,12 +122,12 @@ static void variable_or_fixed_width(nvbench::state& state)
}

NVBENCH_BENCH(fixed_width)
.set_name("Fixed Width Only")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"});
.set_name("Fixed Width Only")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"});

NVBENCH_BENCH(variable_or_fixed_width)
.set_name("Fixed or Variable Width")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"})
.add_string_axis("strings", {"include strings", "no strings"});
.set_name("Fixed or Variable Width")
.add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
.add_string_axis("direction", {"to row", "from row"})
.add_string_axis("strings", {"include strings", "no strings"});
74 changes: 37 additions & 37 deletions src/main/cpp/src/RowConversionJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,97 +21,97 @@
extern "C" {

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(JNIEnv* env,
jclass,
jlong input_table)
{
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(
JNIEnv *env, jclass, jlong input_table) {
JNI_NULL_CHECK(env, input_table, "input table is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
cudf::table_view const *n_input_table =
reinterpret_cast<cudf::table_view const *>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows_fixed_width_optimized(*n_input_table);
cudf::jni::convert_to_rows_fixed_width_optimized(*n_input_table);
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
return cudf::jni::release_as_jlong(col);
});
std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
[](auto &col) { return cudf::jni::release_as_jlong(col); });
return outcol_handles.get_jArray();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(JNIEnv* env, jclass, jlong input_table)
{
Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(
JNIEnv *env, jclass, jlong input_table) {
JNI_NULL_CHECK(env, input_table, "input table is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
cudf::table_view const *n_input_table =
reinterpret_cast<cudf::table_view const *>(input_table);
std::vector<std::unique_ptr<cudf::column>> cols =
spark_rapids_jni::convert_to_rows(*n_input_table);
cudf::jni::convert_to_rows(*n_input_table);
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
int const num_columns = cols.size();
cudf::jni::native_jlongArray outcol_handles(env, num_columns);
std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
return cudf::jni::release_as_jlong(col);
});
std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
[](auto &col) { return cudf::jni::release_as_jlong(col); });
return outcol_handles.get_jArray();
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRowsFixedWidthOptimized(
JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
{
JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, types, "types is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
cudf::lists_column_view const list_input{
*reinterpret_cast<cudf::column_view *>(input_column)};
cudf::jni::native_jintArray n_types(env, types);
cudf::jni::native_jintArray n_scale(env, scale);
if (n_types.size() != n_scale.size()) {
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
"types and scales must match size", NULL);
}
std::vector<cudf::data_type> types_vec;
std::transform(n_types.begin(),
n_types.end(),
n_scale.begin(),
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
std::back_inserter(types_vec), [](jint type, jint scale) {
return cudf::jni::make_data_type(type, scale);
});
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows_fixed_width_optimized(list_input, types_vec);
cudf::jni::convert_from_rows_fixed_width_optimized(list_input,
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
types_vec);
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
}

JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
{
JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, types, "types is null", 0);

try {
cudf::jni::auto_set_device(env);
cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
cudf::lists_column_view const list_input{
*reinterpret_cast<cudf::column_view *>(input_column)};
cudf::jni::native_jintArray n_types(env, types);
cudf::jni::native_jintArray n_scale(env, scale);
if (n_types.size() != n_scale.size()) {
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
"types and scales must match size", NULL);
}
std::vector<cudf::data_type> types_vec;
std::transform(n_types.begin(),
n_types.end(),
n_scale.begin(),
std::back_inserter(types_vec),
[](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
std::back_inserter(types_vec), [](jint type, jint scale) {
return cudf::jni::make_data_type(type, scale);
});
std::unique_ptr<cudf::table> result =
spark_rapids_jni::convert_from_rows(list_input, types_vec);
cudf::jni::convert_from_rows(list_input, types_vec);
hyperbolic2346 marked this conversation as resolved.
Show resolved Hide resolved
return cudf::jni::convert_table_for_return(env, result);
}
CATCH_STD(env, 0);
Expand Down
Loading
Loading