Merge branch 'branch-23.12' into bf_batch_query

rapidsai · Nov 16, 2023 · 13afb44 · 13afb44
2 parents 126a033 + 1704296
commit 13afb44
Show file tree

Hide file tree

Showing 14 changed files with 524 additions and 76 deletions.
diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp
@@ -21,6 +21,7 @@
 #include "util.hpp"
 
 #include <benchmark/benchmark.h>
+#include <raft/core/logger.hpp>
 
 #include <algorithm>
 #include <atomic>
@@ -131,7 +132,7 @@ void bench_build(::benchmark::State& state,
       log_info("Overwriting file: %s", index.file.c_str());
     } else {
       return state.SkipWithMessage(
-        "Index file already exists (use --overwrite to overwrite the index).");
+        "Index file already exists (use --force to overwrite the index).");
     }
   }
 
@@ -380,7 +381,7 @@ inline void printf_usage()
   ::benchmark::PrintDefaultHelp();
   fprintf(stdout,
           "          [--build|--search] \n"
-          "          [--overwrite]\n"
+          "          [--force]\n"
           "          [--data_prefix=<prefix>]\n"
           "          [--index_prefix=<prefix>]\n"
           "          [--override_kv=<key:value1:value2:...:valueN>]\n"
@@ -392,7 +393,7 @@ inline void printf_usage()
           "  --build: build mode, will build index\n"
           "  --search: search mode, will search using the built index\n"
           "            one and only one of --build and --search should be specified\n"
-          "  --overwrite: force overwriting existing index files\n"
+          "  --force: force overwriting existing index files\n"
           "  --data_prefix=<prefix>:"
           " prepend <prefix> to dataset file paths specified in the <conf>.json (default = "
           "'data/').\n"
@@ -572,6 +573,8 @@ inline auto run_main(int argc, char** argv) -> int
   std::string mode            = "latency";
   std::string threads_arg_txt = "";
   std::vector<int> threads    = {1, -1};  // min_thread, max_thread
+  std::string log_level_str   = "";
+  int raft_log_level          = raft::logger::get(RAFT_NAME).get_level();
   kv_series override_kv{};
 
   char arg0_default[] = "benchmark";  // NOLINT
@@ -589,14 +592,19 @@ inline auto run_main(int argc, char** argv) -> int
   std::ifstream conf_stream(conf_path);
 
   for (int i = 1; i < argc; i++) {
-    if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) ||
+    if (parse_bool_flag(argv[i], "--force", force_overwrite) ||
         parse_bool_flag(argv[i], "--build", build_mode) ||
         parse_bool_flag(argv[i], "--search", search_mode) ||
         parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
         parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
         parse_string_flag(argv[i], "--mode", mode) ||
         parse_string_flag(argv[i], "--override_kv", new_override_kv) ||
-        parse_string_flag(argv[i], "--threads", threads_arg_txt)) {
+        parse_string_flag(argv[i], "--threads", threads_arg_txt) ||
+        parse_string_flag(argv[i], "--raft_log_level", log_level_str)) {
+      if (!log_level_str.empty()) {
+        raft_log_level = std::stoi(log_level_str);
+        log_level_str  = "";
+      }
       if (!threads_arg_txt.empty()) {
         auto threads_arg = split(threads_arg_txt, ':');
         threads[0]       = std::stoi(threads_arg[0]);
@@ -625,6 +633,8 @@ inline auto run_main(int argc, char** argv) -> int
     }
   }
 
+  raft::logger::get(RAFT_NAME).set_level(raft_log_level);
+
   Objective metric_objective = Objective::LATENCY;
   if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; }
 

diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/error.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <sys/mman.h>
+
+#include <cstddef>
+
+namespace raft::mr {
+/**
+ * @brief `device_memory_resource` derived class that uses mmap to allocate memory.
+ * This class enables memory allocation using huge pages.
+ * It is assumed that the allocated memory is directly accessible on device. This currently only
+ * works on GH systems.
+ *
+ * TODO(tfeher): consider improving or removing this helper once we made progress with
+ * https://github.com/rapidsai/raft/issues/1819
+ */
+class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
+ public:
+  cuda_huge_page_resource()                                          = default;
+  ~cuda_huge_page_resource() override                                = default;
+  cuda_huge_page_resource(cuda_huge_page_resource const&)            = default;
+  cuda_huge_page_resource(cuda_huge_page_resource&&)                 = default;
+  cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
+  cuda_huge_page_resource& operator=(cuda_huge_page_resource&&)      = default;
+
+  /**
+   * @brief Query whether the resource supports use of non-null CUDA streams for
+   * allocation/deallocation. `cuda_huge_page_resource` does not support streams.
+   *
+   * @returns bool false
+   */
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return true
+   */
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @note Stream argument is ignored
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
+  {
+    void* _addr{nullptr};
+    _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
+    if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
+      munmap(_addr, bytes);
+      RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
+    }
+    memset(_addr, 0, bytes);
+    return _addr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * @note Stream argument is ignored.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   */
+  void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
+  {
+    if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
+  }
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * Two cuda_huge_page_resources always compare equal, because they can each
+   * deallocate memory allocated by the other.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
+  }
+
+  /**
+   * @brief Get free and available memory for memory resource
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view) const override
+  {
+    std::size_t free_size{};
+    std::size_t total_size{};
+    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
+    return std::make_pair(free_size, total_size);
+  }
+};
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/detail/error.hpp>
+
+#include <cstddef>
+
+namespace raft::mr {
+/**
+ * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
+ * allocation/deallocation.
+ *
+ * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
+ * device_memory_resource as base class. Pinned memory can be accessed from device,
+ * and using this allocator we can create device_mdarray backed by pinned allocator.
+ *
+ * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
+ * (pinned_memory_resource), but that is incompatible with the container_policy class
+ * for device matrix, because the latter expects a device_memory_resource. We shall
+ * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
+ */
+class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
+ public:
+  cuda_pinned_resource()                                       = default;
+  ~cuda_pinned_resource() override                             = default;
+  cuda_pinned_resource(cuda_pinned_resource const&)            = default;
+  cuda_pinned_resource(cuda_pinned_resource&&)                 = default;
+  cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
+  cuda_pinned_resource& operator=(cuda_pinned_resource&&)      = default;
+
+  /**
+   * @brief Query whether the resource supports use of non-null CUDA streams for
+   * allocation/deallocation. `cuda_pinned_resource` does not support streams.
+   *
+   * @returns bool false
+   */
+  [[nodiscard]] bool supports_streams() const noexcept override { return false; }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return true
+   */
+  [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using cudaMalloc.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @note Stream argument is ignored
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @return void* Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
+  {
+    void* ptr{nullptr};
+    RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
+    return ptr;
+  }
+
+  /**
+   * @brief Deallocate memory pointed to by \p p.
+   *
+   * @note Stream argument is ignored.
+   *
+   * @throws Nothing.
+   *
+   * @param p Pointer to be deallocated
+   */
+  void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
+  {
+    RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
+  }
+
+  /**
+   * @brief Compare this resource to another.
+   *
+   * Two cuda_pinned_resources always compare equal, because they can each
+   * deallocate memory allocated by the other.
+   *
+   * @throws Nothing.
+   *
+   * @param other The other resource to compare to
+   * @return true If the two resources are equivalent
+   * @return false If the two resources are not equal
+   */
+  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
+  }
+
+  /**
+   * @brief Get free and available memory for memory resource
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   *
+   * @return std::pair contaiing free_size and total_size of memory
+   */
+  [[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
+    rmm::cuda_stream_view) const override
+  {
+    std::size_t free_size{};
+    std::size_t total_size{};
+    RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
+    return std::make_pair(free_size, total_size);
+  }
+};
+}  // namespace raft::mr
diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
@@ -206,6 +206,21 @@ void parse_build_param(const nlohmann::json& conf,
   }
 }
 
+raft::bench::ann::AllocatorType parse_allocator(std::string mem_type)
+{
+  if (mem_type == "device") {
+    return raft::bench::ann::AllocatorType::Device;
+  } else if (mem_type == "host_pinned") {
+    return raft::bench::ann::AllocatorType::HostPinned;
+  } else if (mem_type == "host_huge_page") {
+    return raft::bench::ann::AllocatorType::HostHugePage;
+  }
+  THROW(
+    "Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", "
+    "\"host_huge_page\"",
+    mem_type.c_str());
+}
+
 template <typename T, typename IdxT>
 void parse_search_param(const nlohmann::json& conf,
                         typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
@@ -227,5 +242,11 @@ void parse_search_param(const nlohmann::json& conf,
       THROW("Invalid value for algo: %s", tmp.c_str());
     }
   }
+  if (conf.contains("graph_memory_type")) {
+    param.graph_mem = parse_allocator(conf.at("graph_memory_type"));
+  }
+  if (conf.contains("internal_dataset_memory_type")) {
+    param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type"));
+  }
 }
 #endif