Update jni

Signed-off-by: Nghia Truong <[email protected]>
NVIDIA · Dec 21, 2023 · a87f3e4 · a87f3e4
1 parent 98dc423
commit a87f3e4
Show file tree

Hide file tree

Showing 7 changed files with 82 additions and 3,794 deletions.
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
@@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
 # ##################################################################################################
 # * dependencies ----------------------------------------------------------------------------------
 
-# find libcu++
-include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
-
-# find thrust/cub
-include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
+# find CCCL
+include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)
 
 # JNI
 find_package(JNI REQUIRED)
@@ -174,7 +171,6 @@ add_library(
   src/map_utils.cu
   src/murmur_hash.cu
   src/parse_uri.cu
-  src/row_conversion.cu
   src/timezones.cu
   src/utilities.cu
   src/xxhash64.cu

diff --git a/src/main/cpp/benchmarks/row_conversion.cpp b/src/main/cpp/benchmarks/row_conversion.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,21 +24,16 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/column_utilities.hpp>
 
-void fixed_width(nvbench::state& state)
-{
+void fixed_width(nvbench::state &state) {
   cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
   auto const direction = state.get_string("direction");
-  auto const table     = create_random_table(cycle_dtypes({cudf::type_id::INT8,
-                                                           cudf::type_id::INT32,
-                                                           cudf::type_id::INT16,
-                                                           cudf::type_id::INT64,
-                                                           cudf::type_id::INT32,
-                                                           cudf::type_id::BOOL8,
-                                                           cudf::type_id::UINT16,
-                                                           cudf::type_id::UINT8,
-                                                           cudf::type_id::UINT64},
-                                                      212),
-                                         row_count{n_rows});
+  auto const table = create_random_table(
+      cycle_dtypes(
+          {cudf::type_id::INT8, cudf::type_id::INT32, cudf::type_id::INT16,
+           cudf::type_id::INT64, cudf::type_id::INT32, cudf::type_id::BOOL8,
+           cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64},
+          212),
+      row_count{n_rows});
 
   std::vector<cudf::data_type> schema;
   cudf::size_type bytes_per_row = 0;
@@ -48,15 +43,15 @@ void fixed_width(nvbench::state& state)
     bytes_per_row += cudf::size_of(t);
   }
 
-  auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
+  auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
 
-  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
     if (direction == "to row") {
-      auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
+      auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
     } else {
-      for (auto const& r : rows) {
+      for (auto const &r : rows) {
         cudf::lists_column_view const l(r->view());
-        auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
+        auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
       }
     }
   });
@@ -65,47 +60,38 @@ void fixed_width(nvbench::state& state)
   state.add_global_memory_reads<int64_t>(bytes_per_row * table->num_rows());
 }
 
-static void variable_or_fixed_width(nvbench::state& state)
-{
+static void variable_or_fixed_width(nvbench::state &state) {
   cudf::size_type const n_rows{(cudf::size_type)state.get_int64("num_rows")};
-  auto const direction       = state.get_string("direction");
+  auto const direction = state.get_string("direction");
   auto const include_strings = state.get_string("strings");
 
   if (n_rows > 1 * 1024 * 1024 && include_strings == "include strings") {
     state.skip("Too many rows for strings will cause memory issues");
     return;
   }
 
-  std::vector<cudf::type_id> const table_types = [&]() -> std::vector<cudf::type_id> {
+  std::vector<cudf::type_id> const table_types =
+      [&]() -> std::vector<cudf::type_id> {
     if (include_strings == "include strings") {
-      return {cudf::type_id::INT8,
-              cudf::type_id::INT32,
-              cudf::type_id::INT16,
-              cudf::type_id::INT64,
-              cudf::type_id::INT32,
-              cudf::type_id::BOOL8,
-              cudf::type_id::STRING,
-              cudf::type_id::UINT16,
-              cudf::type_id::UINT8,
-              cudf::type_id::UINT64};
+      return {cudf::type_id::INT8,   cudf::type_id::INT32,
+              cudf::type_id::INT16,  cudf::type_id::INT64,
+              cudf::type_id::INT32,  cudf::type_id::BOOL8,
+              cudf::type_id::STRING, cudf::type_id::UINT16,
+              cudf::type_id::UINT8,  cudf::type_id::UINT64};
     } else {
-      return {cudf::type_id::INT8,
-              cudf::type_id::INT32,
-              cudf::type_id::INT16,
-              cudf::type_id::INT64,
-              cudf::type_id::INT32,
-              cudf::type_id::BOOL8,
-              cudf::type_id::UINT16,
-              cudf::type_id::UINT8,
-              cudf::type_id::UINT64};
+      return {
+          cudf::type_id::INT8,   cudf::type_id::INT32, cudf::type_id::INT16,
+          cudf::type_id::INT64,  cudf::type_id::INT32, cudf::type_id::BOOL8,
+          cudf::type_id::UINT16, cudf::type_id::UINT8, cudf::type_id::UINT64};
     }
   }();
 
-  auto const table = create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});
+  auto const table =
+      create_random_table(cycle_dtypes(table_types, 155), row_count{n_rows});
 
   std::vector<cudf::data_type> schema;
   cudf::size_type bytes_per_row = 0;
-  cudf::size_type string_bytes  = 0;
+  cudf::size_type string_bytes = 0;
   for (int i = 0; i < table->num_columns(); ++i) {
     auto t = table->get_column(i).type();
     schema.push_back(t);
@@ -117,16 +103,16 @@ static void variable_or_fixed_width(nvbench::state& state)
     }
   }
 
-  auto rows = spark_rapids_jni::convert_to_rows(table->view());
+  auto rows = cudf::convert_to_rows(table->view());
 
-  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch &launch) {
+    auto new_rows = cudf::convert_to_rows(table->view());
     if (direction == "to row") {
-      auto _rows = spark_rapids_jni::convert_to_rows(table->view());
+      auto _rows = cudf::convert_to_rows(table->view());
     } else {
-      for (auto const& r : rows) {
+      for (auto const &r : rows) {
         cudf::lists_column_view const l(r->view());
-        auto out = spark_rapids_jni::convert_from_rows(l, schema);
+        auto out = cudf::convert_from_rows(l, schema);
       }
     }
   });
@@ -136,12 +122,12 @@ static void variable_or_fixed_width(nvbench::state& state)
 }
 
 NVBENCH_BENCH(fixed_width)
-  .set_name("Fixed Width Only")
-  .add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
-  .add_string_axis("direction", {"to row", "from row"});
+    .set_name("Fixed Width Only")
+    .add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
+    .add_string_axis("direction", {"to row", "from row"});
 
 NVBENCH_BENCH(variable_or_fixed_width)
-  .set_name("Fixed or Variable Width")
-  .add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
-  .add_string_axis("direction", {"to row", "from row"})
-  .add_string_axis("strings", {"include strings", "no strings"});
+    .set_name("Fixed or Variable Width")
+    .add_int64_axis("num_rows", {1 * 1024 * 1024, 4 * 1024 * 1024})
+    .add_string_axis("direction", {"to row", "from row"})
+    .add_string_axis("strings", {"include strings", "no strings"});
diff --git a/src/main/cpp/src/RowConversionJni.cpp b/src/main/cpp/src/RowConversionJni.cpp
@@ -21,97 +21,97 @@
 extern "C" {
 
 JNIEXPORT jlongArray JNICALL
-Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(JNIEnv* env,
-                                                                                jclass,
-                                                                                jlong input_table)
-{
+Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRowsFixedWidthOptimized(
+    JNIEnv *env, jclass, jlong input_table) {
   JNI_NULL_CHECK(env, input_table, "input table is null", 0);
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
+    cudf::table_view const *n_input_table =
+        reinterpret_cast<cudf::table_view const *>(input_table);
     std::vector<std::unique_ptr<cudf::column>> cols =
-      spark_rapids_jni::convert_to_rows_fixed_width_optimized(*n_input_table);
+        cudf::jni::convert_to_rows_fixed_width_optimized(*n_input_table);
     int const num_columns = cols.size();
     cudf::jni::native_jlongArray outcol_handles(env, num_columns);
-    std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
-      return cudf::jni::release_as_jlong(col);
-    });
+    std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
+                   [](auto &col) { return cudf::jni::release_as_jlong(col); });
     return outcol_handles.get_jArray();
   }
   CATCH_STD(env, 0);
 }
 
 JNIEXPORT jlongArray JNICALL
-Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(JNIEnv* env, jclass, jlong input_table)
-{
+Java_com_nvidia_spark_rapids_jni_RowConversion_convertToRows(
+    JNIEnv *env, jclass, jlong input_table) {
   JNI_NULL_CHECK(env, input_table, "input table is null", 0);
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::table_view const* n_input_table = reinterpret_cast<cudf::table_view const*>(input_table);
+    cudf::table_view const *n_input_table =
+        reinterpret_cast<cudf::table_view const *>(input_table);
     std::vector<std::unique_ptr<cudf::column>> cols =
-      spark_rapids_jni::convert_to_rows(*n_input_table);
+        cudf::jni::convert_to_rows(*n_input_table);
     int const num_columns = cols.size();
     cudf::jni::native_jlongArray outcol_handles(env, num_columns);
-    std::transform(cols.begin(), cols.end(), outcol_handles.begin(), [](auto& col) {
-      return cudf::jni::release_as_jlong(col);
-    });
+    std::transform(cols.begin(), cols.end(), outcol_handles.begin(),
+                   [](auto &col) { return cudf::jni::release_as_jlong(col); });
     return outcol_handles.get_jArray();
   }
   CATCH_STD(env, 0);
 }
 
 JNIEXPORT jlongArray JNICALL
 Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRowsFixedWidthOptimized(
-  JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
-{
+    JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
   JNI_NULL_CHECK(env, input_column, "input column is null", 0);
   JNI_NULL_CHECK(env, types, "types is null", 0);
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
+    cudf::lists_column_view const list_input{
+        *reinterpret_cast<cudf::column_view *>(input_column)};
     cudf::jni::native_jintArray n_types(env, types);
     cudf::jni::native_jintArray n_scale(env, scale);
     if (n_types.size() != n_scale.size()) {
-      JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
+      JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
+                    "types and scales must match size", NULL);
     }
     std::vector<cudf::data_type> types_vec;
-    std::transform(n_types.begin(),
-                   n_types.end(),
-                   n_scale.begin(),
-                   std::back_inserter(types_vec),
-                   [](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
+    std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
+                   std::back_inserter(types_vec), [](jint type, jint scale) {
+                     return cudf::jni::make_data_type(type, scale);
+                   });
     std::unique_ptr<cudf::table> result =
-      spark_rapids_jni::convert_from_rows_fixed_width_optimized(list_input, types_vec);
+        cudf::jni::convert_from_rows_fixed_width_optimized(list_input,
+                                                           types_vec);
     return cudf::jni::convert_table_for_return(env, result);
   }
   CATCH_STD(env, 0);
 }
 
-JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
-  JNIEnv* env, jclass, jlong input_column, jintArray types, jintArray scale)
-{
+JNIEXPORT jlongArray JNICALL
+Java_com_nvidia_spark_rapids_jni_RowConversion_convertFromRows(
+    JNIEnv *env, jclass, jlong input_column, jintArray types, jintArray scale) {
   JNI_NULL_CHECK(env, input_column, "input column is null", 0);
   JNI_NULL_CHECK(env, types, "types is null", 0);
 
   try {
     cudf::jni::auto_set_device(env);
-    cudf::lists_column_view const list_input{*reinterpret_cast<cudf::column_view*>(input_column)};
+    cudf::lists_column_view const list_input{
+        *reinterpret_cast<cudf::column_view *>(input_column)};
     cudf::jni::native_jintArray n_types(env, types);
     cudf::jni::native_jintArray n_scale(env, scale);
     if (n_types.size() != n_scale.size()) {
-      JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS, "types and scales must match size", NULL);
+      JNI_THROW_NEW(env, cudf::jni::ILLEGAL_ARG_CLASS,
+                    "types and scales must match size", NULL);
     }
     std::vector<cudf::data_type> types_vec;
-    std::transform(n_types.begin(),
-                   n_types.end(),
-                   n_scale.begin(),
-                   std::back_inserter(types_vec),
-                   [](jint type, jint scale) { return cudf::jni::make_data_type(type, scale); });
+    std::transform(n_types.begin(), n_types.end(), n_scale.begin(),
+                   std::back_inserter(types_vec), [](jint type, jint scale) {
+                     return cudf::jni::make_data_type(type, scale);
+                   });
     std::unique_ptr<cudf::table> result =
-      spark_rapids_jni::convert_from_rows(list_input, types_vec);
+        cudf::jni::convert_from_rows(list_input, types_vec);
     return cudf::jni::convert_table_for_return(env, result);
   }
   CATCH_STD(env, 0);