Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward-merge branch-23.12 to branch-24.02 #2004

Merged
merged 1 commit into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "util.hpp"

#include <benchmark/benchmark.h>
#include <raft/core/logger.hpp>

#include <algorithm>
#include <atomic>
Expand Down Expand Up @@ -131,7 +132,7 @@ void bench_build(::benchmark::State& state,
log_info("Overwriting file: %s", index.file.c_str());
} else {
return state.SkipWithMessage(
"Index file already exists (use --overwrite to overwrite the index).");
"Index file already exists (use --force to overwrite the index).");
}
}

Expand Down Expand Up @@ -380,7 +381,7 @@ inline void printf_usage()
::benchmark::PrintDefaultHelp();
fprintf(stdout,
" [--build|--search] \n"
" [--overwrite]\n"
" [--force]\n"
" [--data_prefix=<prefix>]\n"
" [--index_prefix=<prefix>]\n"
" [--override_kv=<key:value1:value2:...:valueN>]\n"
Expand All @@ -392,7 +393,7 @@ inline void printf_usage()
" --build: build mode, will build index\n"
" --search: search mode, will search using the built index\n"
" one and only one of --build and --search should be specified\n"
" --overwrite: force overwriting existing index files\n"
" --force: force overwriting existing index files\n"
" --data_prefix=<prefix>:"
" prepend <prefix> to dataset file paths specified in the <conf>.json (default = "
"'data/').\n"
Expand Down Expand Up @@ -572,6 +573,8 @@ inline auto run_main(int argc, char** argv) -> int
std::string mode = "latency";
std::string threads_arg_txt = "";
std::vector<int> threads = {1, -1}; // min_thread, max_thread
std::string log_level_str = "";
int raft_log_level = raft::logger::get(RAFT_NAME).get_level();
kv_series override_kv{};

char arg0_default[] = "benchmark"; // NOLINT
Expand All @@ -589,14 +592,19 @@ inline auto run_main(int argc, char** argv) -> int
std::ifstream conf_stream(conf_path);

for (int i = 1; i < argc; i++) {
if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) ||
if (parse_bool_flag(argv[i], "--force", force_overwrite) ||
parse_bool_flag(argv[i], "--build", build_mode) ||
parse_bool_flag(argv[i], "--search", search_mode) ||
parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
parse_string_flag(argv[i], "--mode", mode) ||
parse_string_flag(argv[i], "--override_kv", new_override_kv) ||
parse_string_flag(argv[i], "--threads", threads_arg_txt)) {
parse_string_flag(argv[i], "--threads", threads_arg_txt) ||
parse_string_flag(argv[i], "--raft_log_level", log_level_str)) {
if (!log_level_str.empty()) {
raft_log_level = std::stoi(log_level_str);
log_level_str = "";
}
if (!threads_arg_txt.empty()) {
auto threads_arg = split(threads_arg_txt, ':');
threads[0] = std::stoi(threads_arg[0]);
Expand Down Expand Up @@ -625,6 +633,8 @@ inline auto run_main(int argc, char** argv) -> int
}
}

raft::logger::get(RAFT_NAME).set_level(raft_log_level);

Objective metric_objective = Objective::LATENCY;
if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; }

Expand Down
132 changes: 132 additions & 0 deletions cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <sys/mman.h>

#include <cstddef>

namespace raft::mr {
/**
* @brief `device_memory_resource` derived class that uses mmap to allocate memory.
* This class enables memory allocation using huge pages.
* It is assumed that the allocated memory is directly accessible on device. This currently only
* works on GH systems.
*
* TODO(tfeher): consider improving or removing this helper once we made progress with
* https://github.com/rapidsai/raft/issues/1819
*/
class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
public:
cuda_huge_page_resource() = default;
~cuda_huge_page_resource() override = default;
cuda_huge_page_resource(cuda_huge_page_resource const&) = default;
cuda_huge_page_resource(cuda_huge_page_resource&&) = default;
cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
cuda_huge_page_resource& operator=(cuda_huge_page_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_huge_page_resource` does not support streams.
*
* @returns bool false
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @note Stream argument is ignored
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
{
void* _addr{nullptr};
_addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
munmap(_addr, bytes);
RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
}
memset(_addr, 0, bytes);
return _addr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @note Stream argument is ignored.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
{
if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
}

/**
* @brief Compare this resource to another.
*
* Two cuda_huge_page_resources always compare equal, because they can each
* deallocate memory allocated by the other.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
130 changes: 130 additions & 0 deletions cpp/bench/ann/src/common/cuda_pinned_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/mr/device/device_memory_resource.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>

#include <cstddef>

namespace raft::mr {
/**
* @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
* allocation/deallocation.
*
* This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
* device_memory_resource as base class. Pinned memory can be accessed from device,
* and using this allocator we can create device_mdarray backed by pinned allocator.
*
* TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
* (pinned_memory_resource), but that is incompatible with the container_policy class
* for device matrix, because the latter expects a device_memory_resource. We shall
* revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
*/
class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
public:
cuda_pinned_resource() = default;
~cuda_pinned_resource() override = default;
cuda_pinned_resource(cuda_pinned_resource const&) = default;
cuda_pinned_resource(cuda_pinned_resource&&) = default;
cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
cuda_pinned_resource& operator=(cuda_pinned_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_pinned_resource` does not support streams.
*
* @returns bool false
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @note Stream argument is ignored
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
{
void* ptr{nullptr};
RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
return ptr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @note Stream argument is ignored.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
{
RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
}

/**
* @brief Compare this resource to another.
*
* Two cuda_pinned_resources always compare equal, because they can each
* deallocate memory allocated by the other.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
21 changes: 21 additions & 0 deletions cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,21 @@ void parse_build_param(const nlohmann::json& conf,
}
}

raft::bench::ann::AllocatorType parse_allocator(std::string mem_type)
{
if (mem_type == "device") {
return raft::bench::ann::AllocatorType::Device;
} else if (mem_type == "host_pinned") {
return raft::bench::ann::AllocatorType::HostPinned;
} else if (mem_type == "host_huge_page") {
return raft::bench::ann::AllocatorType::HostHugePage;
}
THROW(
"Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", "
"\"host_huge_page\"",
mem_type.c_str());
}

template <typename T, typename IdxT>
void parse_search_param(const nlohmann::json& conf,
typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
Expand All @@ -227,5 +242,11 @@ void parse_search_param(const nlohmann::json& conf,
THROW("Invalid value for algo: %s", tmp.c_str());
}
}
if (conf.contains("graph_memory_type")) {
param.graph_mem = parse_allocator(conf.at("graph_memory_type"));
}
if (conf.contains("internal_dataset_memory_type")) {
param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type"));
}
}
#endif
Loading
Loading