-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Diskann Benchmarking Wrapper #260
base: branch-25.02
Are you sure you want to change the base?
Changes from 107 commits
8e8d3c1
e937ebd
0bbbf0d
02084e2
64f1d60
706f22e
3ea499b
e0aab8f
17c5510
f426df9
a7bdd33
d2442ca
dbc84cc
7e37218
b2aef6d
b9762d5
bf75242
a8bcdef
3818da9
cd8bfe5
ec6d70c
c9f797a
585ad53
441ab2a
33b075d
81c92e6
11545c3
ffea663
9c1cddc
96d5642
a7eb787
4cbe7b1
63621f4
a890ac5
450dcee
b0f4b57
8cd6c40
d658856
d1e4101
2e080c6
8c6a178
626dc17
3d15882
a72165c
9c202f2
58a729c
2412b70
3a56402
0b39d5b
92ec474
333539f
a13bf1a
df54939
93b2620
54385ab
c35d899
31d846a
61e00c7
0bf43e9
0a6b094
645d84b
d6897cc
396a589
d325698
03a1e09
f061f27
f95aec7
b667786
4aa513f
17f723e
6532914
1f168a8
2a5d1fb
173df8f
46e7728
1b03cf7
c131c52
3d40d2d
c3a25fc
6bebeb8
e254c9b
82e21e8
de2bf84
6d3b32d
2506ef1
16f35b3
18f26a7
663dfe0
4bb2b39
fd43711
30bcc6e
a10d834
4b396d7
c443944
ed8c9b6
f015264
6d6167d
9c2185b
2b758d3
b7ba35b
63e02ff
48a6a9d
fd429d2
7acfe51
c7765d9
62b207f
66001c8
0d63fb0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -135,6 +135,12 @@ void bench_build(::benchmark::State& state, | |
} | ||
} | ||
|
||
if (index.algo == "diskann_ssd") { | ||
make_sure_parent_dir_exists(index.file); | ||
index.build_param["dataset_file"] = dataset->base_filename(); | ||
index.build_param["path_to_index"] = index.file; | ||
} | ||
|
||
std::unique_ptr<algo<T>> algo; | ||
try { | ||
algo = create_algo<T>(index.algo, dataset->distance(), dataset->dim(), index.build_param); | ||
|
@@ -144,7 +150,8 @@ void bench_build(::benchmark::State& state, | |
|
||
const auto algo_property = parse_algo_property(algo->get_preference(), index.build_param); | ||
|
||
const T* base_set = dataset->base_set(algo_property.dataset_memory_type); | ||
const T* base_set = nullptr; | ||
if (index.algo != "diskann_ssd") base_set = dataset->base_set(algo_property.dataset_memory_type); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @achirkin if we do not have this line, the entire dataset will be read into the |
||
std::size_t index_size = dataset->base_set_size(); | ||
|
||
cuda_timer gpu_timer{algo}; | ||
|
@@ -223,7 +230,12 @@ void bench_search(::benchmark::State& state, | |
|
||
const T* query_set = nullptr; | ||
|
||
if (!file_exists(index.file)) { | ||
std::string filename; | ||
if (index.algo != "diskann_ssd") | ||
filename = index.file; | ||
else | ||
filename = index.file + "_disk.index"; | ||
if (!file_exists(filename)) { | ||
state.SkipWithError("Index file is missing. Run the benchmark in the build mode first."); | ||
return; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright (c) 2024, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include "../common/ann_types.hpp" | ||
#include "cuvs_vamana_wrapper.h" | ||
|
||
#include <rmm/cuda_device.hpp> | ||
#include <rmm/mr/device/pool_memory_resource.hpp> | ||
#include <rmm/resource_ref.hpp> | ||
|
||
namespace cuvs::bench { | ||
|
||
template <typename T, typename IdxT> | ||
void parse_build_param(const nlohmann::json& conf, | ||
typename cuvs::bench::cuvs_vamana<T, IdxT>::build_param& param) | ||
{ | ||
if (conf.contains("graph_degree")) { param.graph_degree = conf.at("graph_degree"); } | ||
if (conf.contains("visited_size")) { param.visited_size = conf.at("visited_size"); } | ||
if (conf.contains("alpha")) { param.alpha = conf.at("alpha"); } | ||
} | ||
|
||
template <typename T, typename IdxT> | ||
void parse_search_param(const nlohmann::json& conf, | ||
typename cuvs::bench::cuvs_vamana<T, IdxT>::search_param& param) | ||
{ | ||
if (conf.contains("L_search")) { param.L_search = conf.at("L_search"); } | ||
if (conf.contains("num_threads")) { param.num_threads = conf.at("num_threads"); } | ||
} | ||
|
||
template <typename T> | ||
auto create_algo(const std::string& algo_name, | ||
const std::string& distance, | ||
int dim, | ||
const nlohmann::json& conf) -> std::unique_ptr<cuvs::bench::algo<T>> | ||
{ | ||
[[maybe_unused]] cuvs::bench::Metric metric = parse_metric(distance); | ||
std::unique_ptr<cuvs::bench::algo<T>> a; | ||
|
||
if constexpr (std::is_same_v<T, float> or std::is_same_v<T, std::uint8_t>) { | ||
if (algo_name == "cuvs_vamana") { | ||
typename cuvs::bench::cuvs_vamana<T, uint32_t>::build_param param; | ||
parse_build_param<T, uint32_t>(conf, param); | ||
a = std::make_unique<cuvs::bench::cuvs_vamana<T, uint32_t>>(metric, dim, param); | ||
} | ||
} | ||
|
||
if (!a) { throw std::runtime_error("invalid algo: '" + algo_name + "'"); } | ||
|
||
return a; | ||
} | ||
|
||
template <typename T> | ||
auto create_search_param(const std::string& algo_name, const nlohmann::json& conf) | ||
-> std::unique_ptr<typename cuvs::bench::algo<T>::search_param> | ||
{ | ||
if (algo_name == "cuvs_vamana") { | ||
auto param = std::make_unique<typename cuvs::bench::cuvs_vamana<T, uint32_t>::search_param>(); | ||
parse_search_param<T, uint32_t>(conf, *param); | ||
return param; | ||
} | ||
|
||
throw std::runtime_error("invalid algo: '" + algo_name + "'"); | ||
} | ||
|
||
} // namespace cuvs::bench | ||
|
||
REGISTER_ALGO_INSTANCE(float); | ||
|
||
#ifdef ANN_BENCH_BUILD_MAIN | ||
#include "../common/benchmark.hpp" | ||
/* | ||
[NOTE] Dear developer, | ||
|
||
Please don't modify the content of the `main` function; this will make the behavior of the benchmark | ||
executable differ depending on the cmake flags and will complicate the debugging. In particular, | ||
don't try to setup an RMM memory resource here; it will anyway be modified by the memory resource | ||
set on per-algorithm basis. For example, see `cuvs/cuvs_ann_bench_utils.h`. | ||
*/ | ||
int main(int argc, char** argv) { return cuvs::bench::run_main(argc, argv); } | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does MSFT DiskANN repo not support ARM?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, they have
mkl-devel
as a dependency, which is not meant to be installed in aarch64.