Update NVBench fixture to use new hooks, fix pinned memory segfault. (#…

…15492) NVBench recently exposed new hooks for modifying its `main` implementation. Updated cudf to use these. Also noticed that the host pinned-pool memory resource option caused the test to segfault, since the function-scope static holding the pool outlived the CUDA context. Refactored the fixture a bit to ensure that the pool is destroyed before the context. Note that this currently overrides the rapids-cmake version for NVBench. Rapids-cmake should be updated and the override removed before this is merged (ping @robertmaynard). cc: @jrhemstad @davidwendt Authors: - Allison Piper (https://github.com/alliepiper) - David Wendt (https://github.com/davidwendt) Approvers: - David Wendt (https://github.com/davidwendt) - Paul Mattione (https://github.com/pmattione-nvidia) - Robert Maynard (https://github.com/robertmaynard) URL: #15492
rapidsai · Apr 23, 2024 · e6d9b9f · e6d9b9f
1 parent b16e5c2
commit e6d9b9f
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 57 deletions.
diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp
@@ -45,6 +45,8 @@ static std::string cuio_host_mem_param{
  * Initializes the default memory resource to use the RMM pool device resource.
  */
 struct nvbench_base_fixture {
+  using host_pooled_mr_t = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
+
   inline auto make_cuda() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
 
   inline auto make_pool()
@@ -90,12 +92,14 @@ struct nvbench_base_fixture {
 
   inline rmm::host_async_resource_ref make_cuio_host_pinned_pool()
   {
-    using host_pooled_mr = rmm::mr::pool_memory_resource<rmm::mr::pinned_host_memory_resource>;
-    static std::shared_ptr<host_pooled_mr> mr = std::make_shared<host_pooled_mr>(
-      std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
-      size_t{1} * 1024 * 1024 * 1024);
+    if (!this->host_pooled_mr) {
+      // Don't store in static, as the CUDA context may be destroyed before static destruction
+      this->host_pooled_mr = std::make_shared<host_pooled_mr_t>(
+        std::make_shared<rmm::mr::pinned_host_memory_resource>().get(),
+        size_t{1} * 1024 * 1024 * 1024);
+    }
 
-    return *mr;
+    return *this->host_pooled_mr;
   }
 
   inline rmm::host_async_resource_ref create_cuio_host_memory_resource(std::string const& mode)
@@ -126,9 +130,16 @@ struct nvbench_base_fixture {
     std::cout << "CUIO host memory resource = " << cuio_host_mode << "\n";
   }
 
+  ~nvbench_base_fixture()
+  {
+    // Ensure the the pool is freed before the CUDA context is destroyed:
+    cudf::io::set_host_memory_resource(this->make_cuio_host_pinned());
+  }
+
   std::shared_ptr<rmm::mr::device_memory_resource> mr;
   std::string rmm_mode{"pool"};
 
+  std::shared_ptr<host_pooled_mr_t> host_pooled_mr;
   std::string cuio_host_mode{"pinned"};
 };
 

diff --git a/cpp/benchmarks/fixture/nvbench_main.cpp b/cpp/benchmarks/fixture/nvbench_main.cpp
@@ -15,29 +15,44 @@
  */
 
 #include <benchmarks/fixture/nvbench_fixture.hpp>
-#define NVBENCH_ENVIRONMENT cudf::nvbench_base_fixture
 
 #include <nvbench/main.cuh>
 
+#include <string>
 #include <vector>
 
+namespace cudf {
+
 // strip off the rmm_mode and cuio_host_mem parameters before passing the
 // remaining arguments to nvbench::option_parser
-#undef NVBENCH_MAIN_PARSE
-#define NVBENCH_MAIN_PARSE(argc, argv)                     \
-  nvbench::option_parser parser;                           \
-  std::vector<std::string> m_args;                         \
-  for (int i = 0; i < argc; ++i) {                         \
-    std::string arg = argv[i];                             \
-    if (arg == cudf::detail::rmm_mode_param) {             \
-      i += 2;                                              \
-    } else if (arg == cudf::detail::cuio_host_mem_param) { \
-      i += 2;                                              \
-    } else {                                               \
-      m_args.push_back(arg);                               \
-    }                                                      \
-  }                                                        \
-  parser.parse(m_args)
+void benchmark_arg_handler(std::vector<std::string>& args)
+{
+  std::vector<std::string> _cudf_tmp_args;
+
+  for (std::size_t i = 0; i < args.size(); ++i) {
+    std::string arg = args[i];
+    if (arg == cudf::detail::rmm_mode_param) {
+      i++;  // skip the next argument
+    } else if (arg == cudf::detail::cuio_host_mem_param) {
+      i++;  // skip the next argument
+    } else {
+      _cudf_tmp_args.push_back(arg);
+    }
+  }
+
+  args = _cudf_tmp_args;
+}
+
+}  // namespace cudf
+
+// Install arg handler
+#undef NVBENCH_MAIN_CUSTOM_ARGS_HANDLER
+#define NVBENCH_MAIN_CUSTOM_ARGS_HANDLER(args) cudf::benchmark_arg_handler(args)
+
+// Global fixture setup:
+#undef NVBENCH_MAIN_INITIALIZE_CUSTOM_POST
+#define NVBENCH_MAIN_INITIALIZE_CUSTOM_POST(argc, argv) \
+  [[maybe_unused]] auto env_state = cudf::nvbench_base_fixture(argc, argv);
 
 // this declares/defines the main() function using the definitions above
 NVBENCH_MAIN
diff --git a/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff b/cpp/cmake/thirdparty/patches/nvbench_global_setup.diff
diff --git a/cpp/cmake/thirdparty/patches/nvbench_override.json b/cpp/cmake/thirdparty/patches/nvbench_override.json
@@ -2,13 +2,8 @@
 {
   "packages" : {
     "nvbench" : {
-      "patches" : [
-        {
-          "file" : "${current_json_dir}/nvbench_global_setup.diff",
-          "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]",
-          "fixed_in" : ""
-        }
-      ]
+      "git_url": "https://github.com/NVIDIA/nvbench.git",
+      "git_tag": "555d628e9b250868c9da003e4407087ff1982e8e"
     }
   }
 }