Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into fea-mdbuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
wphicks authored Nov 16, 2023
2 parents b84c290 + 7754040 commit 1f0ad4f
Show file tree
Hide file tree
Showing 14 changed files with 525 additions and 43 deletions.
1 change: 0 additions & 1 deletion ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ for FILE in python/*/pyproject.toml; do
for DEP in "${DEPENDENCIES[@]}"; do
sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*\"/g" ${FILE}
done
sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" "${FILE}"
sed_runner "/\"ucx-py==/ s/==.*\"/==${NEXT_UCX_PY_SHORT_TAG_PEP440}.*\"/g" ${FILE}
done

Expand Down
20 changes: 15 additions & 5 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "util.hpp"

#include <benchmark/benchmark.h>
#include <raft/core/logger.hpp>

#include <algorithm>
#include <atomic>
Expand Down Expand Up @@ -131,7 +132,7 @@ void bench_build(::benchmark::State& state,
log_info("Overwriting file: %s", index.file.c_str());
} else {
return state.SkipWithMessage(
"Index file already exists (use --overwrite to overwrite the index).");
"Index file already exists (use --force to overwrite the index).");
}
}

Expand Down Expand Up @@ -380,7 +381,7 @@ inline void printf_usage()
::benchmark::PrintDefaultHelp();
fprintf(stdout,
" [--build|--search] \n"
" [--overwrite]\n"
" [--force]\n"
" [--data_prefix=<prefix>]\n"
" [--index_prefix=<prefix>]\n"
" [--override_kv=<key:value1:value2:...:valueN>]\n"
Expand All @@ -392,7 +393,7 @@ inline void printf_usage()
" --build: build mode, will build index\n"
" --search: search mode, will search using the built index\n"
" one and only one of --build and --search should be specified\n"
" --overwrite: force overwriting existing index files\n"
" --force: force overwriting existing index files\n"
" --data_prefix=<prefix>:"
" prepend <prefix> to dataset file paths specified in the <conf>.json (default = "
"'data/').\n"
Expand Down Expand Up @@ -572,6 +573,8 @@ inline auto run_main(int argc, char** argv) -> int
std::string mode = "latency";
std::string threads_arg_txt = "";
std::vector<int> threads = {1, -1}; // min_thread, max_thread
std::string log_level_str = "";
int raft_log_level = raft::logger::get(RAFT_NAME).get_level();
kv_series override_kv{};

char arg0_default[] = "benchmark"; // NOLINT
Expand All @@ -589,14 +592,19 @@ inline auto run_main(int argc, char** argv) -> int
std::ifstream conf_stream(conf_path);

for (int i = 1; i < argc; i++) {
if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) ||
if (parse_bool_flag(argv[i], "--force", force_overwrite) ||
parse_bool_flag(argv[i], "--build", build_mode) ||
parse_bool_flag(argv[i], "--search", search_mode) ||
parse_string_flag(argv[i], "--data_prefix", data_prefix) ||
parse_string_flag(argv[i], "--index_prefix", index_prefix) ||
parse_string_flag(argv[i], "--mode", mode) ||
parse_string_flag(argv[i], "--override_kv", new_override_kv) ||
parse_string_flag(argv[i], "--threads", threads_arg_txt)) {
parse_string_flag(argv[i], "--threads", threads_arg_txt) ||
parse_string_flag(argv[i], "--raft_log_level", log_level_str)) {
if (!log_level_str.empty()) {
raft_log_level = std::stoi(log_level_str);
log_level_str = "";
}
if (!threads_arg_txt.empty()) {
auto threads_arg = split(threads_arg_txt, ':');
threads[0] = std::stoi(threads_arg[0]);
Expand Down Expand Up @@ -625,6 +633,8 @@ inline auto run_main(int argc, char** argv) -> int
}
}

raft::logger::get(RAFT_NAME).set_level(raft_log_level);

Objective metric_objective = Objective::LATENCY;
if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; }

Expand Down
132 changes: 132 additions & 0 deletions cpp/bench/ann/src/common/cuda_huge_page_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <sys/mman.h>

#include <cstddef>

namespace raft::mr {
/**
* @brief `device_memory_resource` derived class that uses mmap to allocate memory.
* This class enables memory allocation using huge pages.
* It is assumed that the allocated memory is directly accessible on device. This currently only
* works on GH systems.
*
* TODO(tfeher): consider improving or removing this helper once we made progress with
* https://github.com/rapidsai/raft/issues/1819
*/
class cuda_huge_page_resource final : public rmm::mr::device_memory_resource {
public:
cuda_huge_page_resource() = default;
~cuda_huge_page_resource() override = default;
cuda_huge_page_resource(cuda_huge_page_resource const&) = default;
cuda_huge_page_resource(cuda_huge_page_resource&&) = default;
cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default;
cuda_huge_page_resource& operator=(cuda_huge_page_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_huge_page_resource` does not support streams.
*
* @returns bool false
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @note Stream argument is ignored
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
{
void* _addr{nullptr};
_addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); }
if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) {
munmap(_addr, bytes);
RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE");
}
memset(_addr, 0, bytes);
return _addr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @note Stream argument is ignored.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override
{
if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); }
}

/**
* @brief Compare this resource to another.
*
* Two cuda_huge_page_resources always compare equal, because they can each
* deallocate memory allocated by the other.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_huge_page_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
130 changes: 130 additions & 0 deletions cpp/bench/ann/src/common/cuda_pinned_resource.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <rmm/mr/device/device_memory_resource.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/detail/error.hpp>

#include <cstddef>

namespace raft::mr {
/**
* @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for
* allocation/deallocation.
*
* This is almost the same as rmm::mr::host::pinned_memory_resource, but it has
* device_memory_resource as base class. Pinned memory can be accessed from device,
* and using this allocator we can create device_mdarray backed by pinned allocator.
*
* TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm
* (pinned_memory_resource), but that is incompatible with the container_policy class
* for device matrix, because the latter expects a device_memory_resource. We shall
* revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819
*/
class cuda_pinned_resource final : public rmm::mr::device_memory_resource {
public:
cuda_pinned_resource() = default;
~cuda_pinned_resource() override = default;
cuda_pinned_resource(cuda_pinned_resource const&) = default;
cuda_pinned_resource(cuda_pinned_resource&&) = default;
cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default;
cuda_pinned_resource& operator=(cuda_pinned_resource&&) = default;

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
* allocation/deallocation. `cuda_pinned_resource` does not support streams.
*
* @returns bool false
*/
[[nodiscard]] bool supports_streams() const noexcept override { return false; }

/**
* @brief Query whether the resource supports the get_mem_info API.
*
* @return true
*/
[[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; }

private:
/**
* @brief Allocates memory of size at least `bytes` using cudaMalloc.
*
* The returned pointer has at least 256B alignment.
*
* @note Stream argument is ignored
*
* @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
*
* @param bytes The size, in bytes, of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override
{
void* ptr{nullptr};
RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes));
return ptr;
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* @note Stream argument is ignored.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
*/
void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override
{
RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr));
}

/**
* @brief Compare this resource to another.
*
* Two cuda_pinned_resources always compare equal, because they can each
* deallocate memory allocated by the other.
*
* @throws Nothing.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @return false If the two resources are not equal
*/
[[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
{
return dynamic_cast<cuda_pinned_resource const*>(&other) != nullptr;
}

/**
* @brief Get free and available memory for memory resource
*
* @throws `rmm::cuda_error` if unable to retrieve memory info.
*
* @return std::pair contaiing free_size and total_size of memory
*/
[[nodiscard]] std::pair<std::size_t, std::size_t> do_get_mem_info(
rmm::cuda_stream_view) const override
{
std::size_t free_size{};
std::size_t total_size{};
RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size));
return std::make_pair(free_size, total_size);
}
};
} // namespace raft::mr
21 changes: 21 additions & 0 deletions cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,21 @@ void parse_build_param(const nlohmann::json& conf,
}
}

raft::bench::ann::AllocatorType parse_allocator(std::string mem_type)
{
if (mem_type == "device") {
return raft::bench::ann::AllocatorType::Device;
} else if (mem_type == "host_pinned") {
return raft::bench::ann::AllocatorType::HostPinned;
} else if (mem_type == "host_huge_page") {
return raft::bench::ann::AllocatorType::HostHugePage;
}
THROW(
"Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", "
"\"host_huge_page\"",
mem_type.c_str());
}

template <typename T, typename IdxT>
void parse_search_param(const nlohmann::json& conf,
typename raft::bench::ann::RaftCagra<T, IdxT>::SearchParam& param)
Expand All @@ -227,5 +242,11 @@ void parse_search_param(const nlohmann::json& conf,
THROW("Invalid value for algo: %s", tmp.c_str());
}
}
if (conf.contains("graph_memory_type")) {
param.graph_mem = parse_allocator(conf.at("graph_memory_type"));
}
if (conf.contains("internal_dataset_memory_type")) {
param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type"));
}
}
#endif
Loading

0 comments on commit 1f0ad4f

Please sign in to comment.