From d1b53b1b6218bd54df2bbe0eae606fc0e381d90c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Sat, 22 Jul 2023 00:09:13 +0200 Subject: [PATCH 01/57] Separate cagra index type from internal idx type --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 16 ++--- cpp/bench/prims/neighbors/cagra_bench.cuh | 20 +++---- cpp/include/raft/neighbors/cagra.cuh | 46 +++++++-------- cpp/include/raft/neighbors/cagra_types.hpp | 58 +++++++++---------- .../neighbors/detail/cagra/cagra_build.cuh | 11 ++-- .../neighbors/detail/cagra/cagra_search.cuh | 29 +++++----- .../detail/cagra/cagra_serialize.cuh | 6 +- .../neighbors/detail/cagra/graph_core.cuh | 34 ++++++----- .../detail/cagra/search_multi_cta.cuh | 4 +- .../cagra/search_multi_cta_kernel-ext.cuh | 8 +-- .../cagra/search_multi_cta_kernel-inl.cuh | 4 +- .../detail/cagra/search_multi_kernel.cuh | 4 +- .../neighbors/detail/cagra/search_plan.cuh | 4 +- .../detail/cagra/search_single_cta.cuh | 4 +- .../cagra/search_single_cta_kernel-ext.cuh | 8 +-- .../cagra/search_single_cta_kernel-inl.cuh | 4 +- .../cagra/search_multi_cta_00_generate.py | 11 ++-- ...arch_multi_cta_float_uint32_dim1024_t32.cu | 4 +- ...search_multi_cta_float_uint32_dim128_t8.cu | 4 +- ...earch_multi_cta_float_uint32_dim256_t16.cu | 4 +- ...earch_multi_cta_float_uint32_dim512_t32.cu | 4 +- ...arch_multi_cta_float_uint64_dim1024_t32.cu | 4 +- ...search_multi_cta_float_uint64_dim128_t8.cu | 4 +- ...earch_multi_cta_float_uint64_dim256_t16.cu | 4 +- ...earch_multi_cta_float_uint64_dim512_t32.cu | 4 +- ...earch_multi_cta_int8_uint32_dim1024_t32.cu | 4 +- .../search_multi_cta_int8_uint32_dim128_t8.cu | 4 +- ...search_multi_cta_int8_uint32_dim256_t16.cu | 4 +- ...search_multi_cta_int8_uint32_dim512_t32.cu | 4 +- ...arch_multi_cta_uint8_uint32_dim1024_t32.cu | 4 +- ...search_multi_cta_uint8_uint32_dim128_t8.cu | 4 +- ...earch_multi_cta_uint8_uint32_dim256_t16.cu | 4 +- ...earch_multi_cta_uint8_uint32_dim512_t32.cu | 4 +- .../cagra/search_single_cta_00_generate.py | 4 +- ...rch_single_cta_float_uint32_dim1024_t32.cu | 4 +- ...earch_single_cta_float_uint32_dim128_t8.cu | 4 +- ...arch_single_cta_float_uint32_dim256_t16.cu | 4 +- ...arch_single_cta_float_uint32_dim512_t32.cu | 4 +- ...rch_single_cta_float_uint64_dim1024_t32.cu | 4 +- ...earch_single_cta_float_uint64_dim128_t8.cu | 4 +- ...arch_single_cta_float_uint64_dim256_t16.cu | 4 +- ...arch_single_cta_float_uint64_dim512_t32.cu | 4 +- ...arch_single_cta_int8_uint32_dim1024_t32.cu | 4 +- ...search_single_cta_int8_uint32_dim128_t8.cu | 4 +- ...earch_single_cta_int8_uint32_dim256_t16.cu | 4 +- ...earch_single_cta_int8_uint32_dim512_t32.cu | 4 +- ...rch_single_cta_uint8_uint32_dim1024_t32.cu | 4 +- ...earch_single_cta_uint8_uint32_dim128_t8.cu | 4 +- ...arch_single_cta_uint8_uint32_dim256_t16.cu | 4 +- ...arch_single_cta_uint8_uint32_dim512_t32.cu | 4 +- cpp/test/neighbors/ann_cagra.cuh | 30 +++++----- .../ann_cagra/search_kernel_uint64_t.cuh | 8 +-- 52 files changed, 224 insertions(+), 217 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 79ae746078..e6490b3f07 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -71,7 +71,7 @@ class RaftCagra : public ANN { AlgoProperty get_property() const override { AlgoProperty property; - property.dataset_memory_type = MemoryType::Host; + property.dataset_memory_type = MemoryType::HostMmap; property.query_memory_type = MemoryType::Device; property.need_dataset_when_search = true; return property; @@ -104,13 +104,14 @@ RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param) template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - if (get_property().dataset_memory_type == MemoryType::Host) { - auto dataset_view = raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + if (get_property().dataset_memory_type != MemoryType::Device) { + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); index_.emplace( raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); } else { auto dataset_view = - raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); + raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); index_.emplace( raft::neighbors::experimental::cagra::build(handle_, index_params_, dataset_view)); } @@ -152,9 +153,10 @@ void RaftCagra::search( neighbors_IdxT = neighbors_storage.data(); } - auto queries_view = raft::make_device_matrix_view(queries, batch_size, dimension_); - auto neighbors_view = raft::make_device_matrix_view(neighbors_IdxT, batch_size, k); - auto distances_view = raft::make_device_matrix_view(distances, batch_size, k); + auto queries_view = + raft::make_device_matrix_view(queries, batch_size, dimension_); + auto neighbors_view = raft::make_device_matrix_view(neighbors_IdxT, batch_size, k); + auto distances_view = raft::make_device_matrix_view(distances, batch_size, k); raft::neighbors::experimental::cagra::search( handle_, search_params_, *index_, queries_view, neighbors_view, distances_view); diff --git a/cpp/bench/prims/neighbors/cagra_bench.cuh b/cpp/bench/prims/neighbors/cagra_bench.cuh index c361dc82dc..19679377d8 100644 --- a/cpp/bench/prims/neighbors/cagra_bench.cuh +++ b/cpp/bench/prims/neighbors/cagra_bench.cuh @@ -47,9 +47,9 @@ struct CagraBench : public fixture { explicit CagraBench(const params& ps) : fixture(true), params_(ps), - queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)), - dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)), - knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)) + queries_(make_device_matrix(handle, ps.n_queries, ps.n_dims)), + dataset_(make_device_matrix(handle, ps.n_samples, ps.n_dims)), + knn_graph_(make_device_matrix(handle, ps.n_samples, ps.degree)) { // Generate random dataset and queriees raft::random::RngState state{42}; @@ -87,11 +87,11 @@ struct CagraBench : public fixture { search_params.thread_block_size = params_.block_size; search_params.num_parents = params_.num_parents; - auto indices = make_device_matrix(handle, params_.n_queries, params_.k); - auto distances = make_device_matrix(handle, params_.n_queries, params_.k); - auto ind_v = make_device_matrix_view( + auto indices = make_device_matrix(handle, params_.n_queries, params_.k); + auto distances = make_device_matrix(handle, params_.n_queries, params_.k); + auto ind_v = make_device_matrix_view( indices.data_handle(), params_.n_queries, params_.k); - auto dist_v = make_device_matrix_view( + auto dist_v = make_device_matrix_view( distances.data_handle(), params_.n_queries, params_.k); auto queries_v = make_const_mdspan(queries_.view()); @@ -125,9 +125,9 @@ struct CagraBench : public fixture { private: const params params_; std::optional> index_; - raft::device_matrix queries_; - raft::device_matrix dataset_; - raft::device_matrix knn_graph_; + raft::device_matrix queries_; + raft::device_matrix dataset_; + raft::device_matrix knn_graph_; }; inline const std::vector generate_inputs() diff --git a/cpp/include/raft/neighbors/cagra.cuh b/cpp/include/raft/neighbors/cagra.cuh index 5934f6ef69..c9f2127572 100644 --- a/cpp/include/raft/neighbors/cagra.cuh +++ b/cpp/include/raft/neighbors/cagra.cuh @@ -65,7 +65,7 @@ namespace raft::neighbors::experimental::cagra { * @endcode * * @tparam T data element type - * @tparam IdxT type of the indices in the source dataset + * @tparam IdxT type of the dataset vector indices * * @param[in] res raft resources * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim] @@ -76,19 +76,19 @@ namespace raft::neighbors::experimental::cagra { */ template void build_knn_graph(raft::resources const& res, - mdspan, row_major, accessor> dataset, - raft::host_matrix_view knn_graph, + mdspan, row_major, accessor> dataset, + raft::host_matrix_view knn_graph, std::optional refine_rate = std::nullopt, std::optional build_params = std::nullopt, std::optional search_params = std::nullopt) { using internal_IdxT = typename std::make_unsigned::type; - auto knn_graph_internal = make_host_matrix_view( + auto knn_graph_internal = make_host_matrix_view( reinterpret_cast(knn_graph.data_handle()), knn_graph.extent(0), knn_graph.extent(1)); - auto dataset_internal = mdspan, row_major, accessor>( + auto dataset_internal = mdspan, row_major, accessor>( dataset.data_handle(), dataset.extent(0), dataset.extent(1)); detail::build_knn_graph( @@ -119,7 +119,7 @@ void build_knn_graph(raft::resources const& res, * @endcode * * @tparam DataT type of the data in the source dataset - * @tparam IdxT type of the indices in the source dataset + * @tparam IdxT type of the dataset vector indices * * @param[in] res raft resources * @param[in] dataset a matrix view (host or device) to a row-major matrix [n_rows, dim] @@ -133,20 +133,20 @@ template , memory_type::host>> void sort_knn_graph(raft::resources const& res, - mdspan, row_major, d_accessor> dataset, - mdspan, row_major, g_accessor> knn_graph) + mdspan, row_major, d_accessor> dataset, + mdspan, row_major, g_accessor> knn_graph) { using internal_IdxT = typename std::make_unsigned::type; using g_accessor_internal = host_device_accessor, g_accessor::mem_type>; auto knn_graph_internal = - mdspan, row_major, g_accessor_internal>( + mdspan, row_major, g_accessor_internal>( reinterpret_cast(knn_graph.data_handle()), knn_graph.extent(0), knn_graph.extent(1)); - auto dataset_internal = mdspan, row_major, d_accessor>( + auto dataset_internal = mdspan, row_major, d_accessor>( dataset.data_handle(), dataset.extent(0), dataset.extent(1)); detail::graph::sort_knn_graph(res, dataset_internal, knn_graph_internal); @@ -170,12 +170,12 @@ template , memory_type::host>> void optimize(raft::resources const& res, - mdspan, row_major, g_accessor> knn_graph, - raft::host_matrix_view new_graph) + mdspan, row_major, g_accessor> knn_graph, + raft::host_matrix_view new_graph) { using internal_IdxT = typename std::make_unsigned::type; - auto new_graph_internal = raft::make_host_matrix_view( + auto new_graph_internal = raft::make_host_matrix_view( reinterpret_cast(new_graph.data_handle()), new_graph.extent(0), new_graph.extent(1)); @@ -183,7 +183,7 @@ void optimize(raft::resources const& res, using g_accessor_internal = host_device_accessor, memory_type::host>; auto knn_graph_internal = - mdspan, row_major, g_accessor_internal>( + mdspan, row_major, g_accessor_internal>( reinterpret_cast(knn_graph.data_handle()), knn_graph.extent(0), knn_graph.extent(1)); @@ -237,7 +237,7 @@ template , memory_type::host>> index build(raft::resources const& res, const index_params& params, - mdspan, row_major, Accessor> dataset) + mdspan, row_major, Accessor> dataset) { size_t intermediate_degree = params.intermediate_graph_degree; size_t graph_degree = params.graph_degree; @@ -256,11 +256,11 @@ index build(raft::resources const& res, graph_degree = intermediate_degree; } - auto knn_graph = raft::make_host_matrix(dataset.extent(0), intermediate_degree); + auto knn_graph = raft::make_host_matrix(dataset.extent(0), intermediate_degree); build_knn_graph(res, dataset, knn_graph.view()); - auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); + auto cagra_graph = raft::make_host_matrix(dataset.extent(0), graph_degree); optimize(res, knn_graph.view(), cagra_graph.view()); @@ -289,9 +289,9 @@ template void search(raft::resources const& res, const search_params& params, const index& idx, - raft::device_matrix_view queries, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances) + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) { RAFT_EXPECTS( queries.extent(0) == neighbors.extent(0) && queries.extent(0) == distances.extent(0), @@ -303,13 +303,13 @@ void search(raft::resources const& res, "Number of query dimensions should equal number of dimensions in the index."); using internal_IdxT = typename std::make_unsigned::type; - auto queries_internal = raft::make_device_matrix_view( + auto queries_internal = raft::make_device_matrix_view( queries.data_handle(), queries.extent(0), queries.extent(1)); - auto neighbors_internal = raft::make_device_matrix_view( + auto neighbors_internal = raft::make_device_matrix_view( reinterpret_cast(neighbors.data_handle()), neighbors.extent(0), neighbors.extent(1)); - auto distances_internal = raft::make_device_matrix_view( + auto distances_internal = raft::make_device_matrix_view( distances.data_handle(), distances.extent(0), distances.extent(1)); detail::search_main( diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 44375c01f0..c046260cab 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -108,7 +108,7 @@ static_assert(std::is_aggregate_v); * The index stores the dataset and a kNN graph in device memory. * * @tparam T data element type - * @tparam IdxT type of the indices in the source dataset + * @tparam IdxT type of the vector indices (represent dataset.extent(0)) * */ template @@ -124,7 +124,7 @@ struct index : ann::index { return metric_; } - // /** Total length of the index. */ + // /** Total length of the index (number of vectors). */ [[nodiscard]] constexpr inline auto size() const noexcept -> IdxT { return dataset_view_.extent(0); @@ -143,16 +143,14 @@ struct index : ann::index { /** Dataset [size, dim] */ [[nodiscard]] inline auto dataset() const noexcept - -> device_matrix_view + -> device_matrix_view { return dataset_view_; } /** neighborhood graph [size, graph-degree] */ - inline auto graph() noexcept -> device_matrix_view { return graph_view_; } - [[nodiscard]] inline auto graph() const noexcept - -> device_matrix_view + -> device_matrix_view { return graph_view_; } @@ -168,8 +166,8 @@ struct index : ann::index { index(raft::resources const& res) : ann::index(), metric_(raft::distance::DistanceType::L2Expanded), - dataset_(make_device_matrix(res, 0, 0)), - graph_(make_device_matrix(res, 0, 0)) + dataset_(make_device_matrix(res, 0, 0)), + graph_(make_device_matrix(res, 0, 0)) { } @@ -189,7 +187,7 @@ struct index : ann::index { * - Cagra index is normally created by the cagra::build * @code{.cpp} * using namespace raft::neighbors::experimental; - * auto dataset = raft::make_host_matrix(n_rows, n_cols); + * auto dataset = raft::make_host_matrix(n_rows, n_cols); * load_dataset(dataset.view()); * // use default index parameters * cagra::index_params index_params; @@ -198,8 +196,8 @@ struct index : ann::index { * // use default search parameters * cagra::search_params search_params; * // search K nearest neighbours - * auto neighbors = raft::make_device_matrix(res, n_queries, k); - * auto distances = raft::make_device_matrix(res, n_queries, k); + * auto neighbors = raft::make_device_matrix(res, n_queries, k); + * auto distances = raft::make_device_matrix(res, n_queries, k); * cagra::search(res, search_params, index, queries, neighbors, distances); * @endcode * In the above example, we have passed a host dataset to build. The returned index will own a @@ -210,8 +208,8 @@ struct index : ann::index { * @code{.cpp} * using namespace raft::neighbors::experimental; * - * auto dataset = raft::make_device_matrix(res, n_rows, n_cols); - * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); + * auto dataset = raft::make_device_matrix(res, n_rows, n_cols); + * auto knn_graph = raft::make_device_matrix(res, n_rows, graph_degree); * * // custom loading and graph creation * // load_dataset(dataset.view()); @@ -230,12 +228,12 @@ struct index : ann::index { template index(raft::resources const& res, raft::distance::DistanceType metric, - mdspan, row_major, data_accessor> dataset, - mdspan, row_major, graph_accessor> knn_graph) + mdspan, row_major, data_accessor> dataset, + mdspan, row_major, graph_accessor> knn_graph) : ann::index(), metric_(metric), - dataset_(make_device_matrix(res, 0, 0)), - graph_(make_device_matrix(res, 0, 0)) + dataset_(make_device_matrix(res, 0, 0)), + graph_(make_device_matrix(res, 0, 0)) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); @@ -252,13 +250,13 @@ struct index : ann::index { * index. */ void update_dataset(raft::resources const& res, - raft::device_matrix_view dataset) + raft::device_matrix_view dataset) { if (dataset.extent(1) % AlignDim::Value != 0) { RAFT_LOG_DEBUG("Creating a padded copy of CAGRA dataset in device memory"); copy_padded(res, dataset); } else { - dataset_view_ = make_device_strided_matrix_view( + dataset_view_ = make_device_strided_matrix_view( dataset.data_handle(), dataset.extent(0), dataset.extent(1), dataset.extent(1)); } } @@ -269,7 +267,7 @@ struct index : ann::index { * We create a copy of the dataset on the device. The index manages the lifetime of this copy. */ void update_dataset(raft::resources const& res, - raft::host_matrix_view dataset) + raft::host_matrix_view dataset) { RAFT_LOG_DEBUG("Copying CAGRA dataset from host to device"); copy_padded(res, dataset); @@ -282,7 +280,7 @@ struct index : ann::index { * the caller's responsibility to ensure that knn_graph stays alive as long as the index. */ void update_graph(raft::resources const& res, - raft::device_matrix_view knn_graph) + raft::device_matrix_view knn_graph) { graph_view_ = knn_graph; } @@ -293,10 +291,10 @@ struct index : ann::index { * We create a copy of the graph on the device. The index manages the lifetime of this copy. */ void update_graph(raft::resources const& res, - raft::host_matrix_view knn_graph) + raft::host_matrix_view knn_graph) { RAFT_LOG_DEBUG("Copying CAGRA knn graph from host to device"); - graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); + graph_ = make_device_matrix(res, knn_graph.extent(0), knn_graph.extent(1)); raft::copy(graph_.data_handle(), knn_graph.data_handle(), knn_graph.size(), @@ -308,10 +306,10 @@ struct index : ann::index { /** Create a device copy of the dataset, and pad it if necessary. */ template void copy_padded(raft::resources const& res, - mdspan, row_major, data_accessor> dataset) + mdspan, row_major, data_accessor> dataset) { dataset_ = - make_device_matrix(res, dataset.extent(0), AlignDim::roundUp(dataset.extent(1))); + make_device_matrix(res, dataset.extent(0), AlignDim::roundUp(dataset.extent(1))); if (dataset_.extent(1) == dataset.extent(1)) { raft::copy(dataset_.data_handle(), dataset.data_handle(), @@ -330,7 +328,7 @@ struct index : ann::index { cudaMemcpyDefault, resource::get_cuda_stream(res))); } - dataset_view_ = make_device_strided_matrix_view( + dataset_view_ = make_device_strided_matrix_view( dataset_.data_handle(), dataset_.extent(0), dataset.extent(1), dataset_.extent(1)); RAFT_LOG_DEBUG("CAGRA dataset strided matrix view %zux%zu, stride %zu", static_cast(dataset_view_.extent(0)), @@ -339,10 +337,10 @@ struct index : ann::index { } raft::distance::DistanceType metric_; - raft::device_matrix dataset_; - raft::device_matrix graph_; - raft::device_matrix_view dataset_view_; - raft::device_matrix_view graph_view_; + raft::device_matrix dataset_; + raft::device_matrix graph_; + raft::device_matrix_view dataset_view_; + raft::device_matrix_view graph_view_; }; /** @} */ diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index d2bf7bf1ed..4f8323a481 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -40,8 +40,8 @@ namespace raft::neighbors::experimental::cagra::detail { template void build_knn_graph(raft::resources const& res, - mdspan, row_major, accessor> dataset, - raft::host_matrix_view knn_graph, + mdspan, row_major, accessor> dataset, + raft::host_matrix_view knn_graph, std::optional refine_rate = std::nullopt, std::optional build_params = std::nullopt, std::optional search_params = std::nullopt) @@ -108,7 +108,6 @@ void build_knn_graph(raft::resources const& res, max_batch_size, search_params->n_probes); - // TODO(tfeher): shall we use uint32_t? auto distances = raft::make_device_matrix(res, max_batch_size, gpu_top_k); auto neighbors = raft::make_device_matrix(res, max_batch_size, gpu_top_k); auto refined_distances = raft::make_device_matrix(res, max_batch_size, top_k); @@ -139,6 +138,8 @@ void build_knn_graph(raft::resources const& res, size_t d_report_offset = dataset.extent(0) / 100; // Report progress in 1% steps. for (const auto& batch : vec_batches) { + // Map int64_t to uint32_t because ivf_pq requires the latter. + // TODO(tfeher): remove this mapping once ivf_pq accepts mdspan with int64_t index type auto queries_view = raft::make_device_matrix_view( batch.data(), batch.size(), batch.row_width()); auto neighbors_view = make_device_matrix_view( @@ -147,7 +148,6 @@ void build_knn_graph(raft::resources const& res, distances.data_handle(), batch.size(), distances.extent(1)); ivf_pq::search(res, *search_params, index, queries_view, neighbors_view, distances_view); - if constexpr (is_host_mdspan_v) { raft::copy(neighbors_host.data_handle(), neighbors.data_handle(), @@ -167,7 +167,7 @@ void build_knn_graph(raft::resources const& res, refined_distances_host.data_handle(), batch.size(), top_k); resource::sync_stream(res); - raft::neighbors::detail::refine_host( // res, + raft::neighbors::detail::refine_host( dataset, queries_host_view, neighbors_host_view, @@ -234,6 +234,7 @@ void build_knn_graph(raft::resources const& res, } first = false; } + if (!first) RAFT_LOG_DEBUG("# Finished building kNN graph"); } diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh index 1561a3bb8d..e7688db1af 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_search.cuh @@ -38,7 +38,9 @@ namespace raft::neighbors::experimental::cagra::detail { * See the [build](#build) documentation for a usage example. * * @tparam T data element type - * @tparam IdxT type of the indices + * @tparam IdxT type of database vector indices + * @tparam internal_IdxT during search we map IdxT to internal_IdxT, this way we do not need + * separate kernels for int/uint. * * @param[in] handle * @param[in] params configure the search @@ -54,9 +56,9 @@ template & index, - raft::device_matrix_view queries, - raft::device_matrix_view neighbors, - raft::device_matrix_view distances) + raft::device_matrix_view queries, + raft::device_matrix_view neighbors, + raft::device_matrix_view distances) { RAFT_LOG_DEBUG("# dataset size = %lu, dim = %lu\n", static_cast(index.dataset().extent(0)), @@ -92,16 +94,15 @@ void search_main(raft::resources const& res, : nullptr; uint32_t* _num_executed_iterations = nullptr; - auto dataset_internal = make_device_strided_matrix_view( - index.dataset().data_handle(), - index.dataset().extent(0), - index.dataset().extent(1), - index.dataset().stride(0)); - auto graph_internal = - raft::make_device_matrix_view( - reinterpret_cast(index.graph().data_handle()), - index.graph().extent(0), - index.graph().extent(1)); + auto dataset_internal = + make_device_strided_matrix_view(index.dataset().data_handle(), + index.dataset().extent(0), + index.dataset().extent(1), + index.dataset().stride(0)); + auto graph_internal = raft::make_device_matrix_view( + reinterpret_cast(index.graph().data_handle()), + index.graph().extent(0), + index.graph().extent(1)); (*plan)(res, dataset_internal, diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 7f708506a5..1b95daf431 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -62,7 +62,7 @@ void serialize(raft::resources const& res, std::ostream& os, const index(dataset.extent(0), dataset.extent(1)); + auto host_dataset = make_host_matrix(dataset.extent(0), dataset.extent(1)); RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(), sizeof(T) * host_dataset.extent(1), dataset.data_handle(), @@ -111,8 +111,8 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index auto graph_degree = deserialize_scalar(res, is); auto metric = deserialize_scalar(res, is); - auto dataset = raft::make_host_matrix(n_rows, dim); - auto graph = raft::make_host_matrix(n_rows, graph_degree); + auto dataset = raft::make_host_matrix(n_rows, dim); + auto graph = raft::make_host_matrix(n_rows, graph_degree); deserialize_mdspan(res, is, dataset.view()); deserialize_mdspan(res, is, graph.view()); diff --git a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh index d915634df9..f66d85a6b8 100644 --- a/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/graph_core.cuh @@ -231,8 +231,8 @@ template , memory_type::host>> void sort_knn_graph(raft::resources const& res, - mdspan, row_major, d_accessor> dataset, - mdspan, row_major, g_accessor> knn_graph) + mdspan, row_major, d_accessor> dataset, + mdspan, row_major, g_accessor> knn_graph) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "dataset size is expected to have the same number of graph index size"); @@ -252,7 +252,7 @@ void sort_knn_graph(raft::resources const& res, const double time_sort_start = cur_time(); RAFT_LOG_DEBUG("# Sorting kNN Graph on GPUs "); - auto d_dataset = raft::make_device_matrix(res, dataset_size, dataset_dim); + auto d_dataset = raft::make_device_matrix(res, dataset_size, dataset_dim); raft::copy(d_dataset.data_handle(), dataset_ptr, dataset_size * dataset_dim, @@ -318,8 +318,8 @@ template , memory_type::host>> void optimize(raft::resources const& res, - mdspan, row_major, g_accessor> knn_graph, - raft::host_matrix_view new_graph) + mdspan, row_major, g_accessor> knn_graph, + raft::host_matrix_view new_graph) { RAFT_LOG_DEBUG( "# Pruning kNN graph (size=%lu, degree=%lu)\n", knn_graph.extent(0), knn_graph.extent(1)); @@ -334,23 +334,24 @@ void optimize(raft::resources const& res, auto output_graph_ptr = new_graph.data_handle(); const IdxT graph_size = new_graph.extent(0); - auto pruned_graph = raft::make_host_matrix(graph_size, output_graph_degree); + auto pruned_graph = raft::make_host_matrix(graph_size, output_graph_degree); { // // Prune kNN graph // - auto d_input_graph = raft::make_device_matrix(res, graph_size, input_graph_degree); + auto d_input_graph = + raft::make_device_matrix(res, graph_size, input_graph_degree); - auto detour_count = raft::make_host_matrix(graph_size, input_graph_degree); + auto detour_count = raft::make_host_matrix(graph_size, input_graph_degree); auto d_detour_count = - raft::make_device_matrix(res, graph_size, input_graph_degree); + raft::make_device_matrix(res, graph_size, input_graph_degree); RAFT_CUDA_TRY(cudaMemsetAsync(d_detour_count.data_handle(), 0xff, graph_size * input_graph_degree * sizeof(uint8_t), resource::get_cuda_stream(res))); - auto d_num_no_detour_edges = raft::make_device_vector(res, graph_size); + auto d_num_no_detour_edges = raft::make_device_vector(res, graph_size); RAFT_CUDA_TRY(cudaMemsetAsync(d_num_no_detour_edges.data_handle(), 0x00, graph_size * sizeof(uint32_t), @@ -468,8 +469,8 @@ void optimize(raft::resources const& res, (double)num_full / graph_size * 100); } - auto rev_graph = raft::make_host_matrix(graph_size, output_graph_degree); - auto rev_graph_count = raft::make_host_vector(graph_size); + auto rev_graph = raft::make_host_matrix(graph_size, output_graph_degree); + auto rev_graph_count = raft::make_host_vector(graph_size); { // @@ -477,20 +478,21 @@ void optimize(raft::resources const& res, // const double time_make_start = cur_time(); - auto d_rev_graph = raft::make_device_matrix(res, graph_size, output_graph_degree); + auto d_rev_graph = + raft::make_device_matrix(res, graph_size, output_graph_degree); RAFT_CUDA_TRY(cudaMemsetAsync(d_rev_graph.data_handle(), 0xff, graph_size * output_graph_degree * sizeof(IdxT), resource::get_cuda_stream(res))); - auto d_rev_graph_count = raft::make_device_vector(res, graph_size); + auto d_rev_graph_count = raft::make_device_vector(res, graph_size); RAFT_CUDA_TRY(cudaMemsetAsync(d_rev_graph_count.data_handle(), 0x00, graph_size * sizeof(uint32_t), resource::get_cuda_stream(res))); - auto dest_nodes = raft::make_host_vector(graph_size); - auto d_dest_nodes = raft::make_device_vector(res, graph_size); + auto dest_nodes = raft::make_host_vector(graph_size); + auto d_dest_nodes = raft::make_device_vector(res, graph_size); for (uint64_t k = 0; k < output_graph_degree; k++) { #pragma omp parallel for diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh index bf6a32eac8..b8a05239be 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta.cuh @@ -178,8 +178,8 @@ struct search : public search_plan_impl { ~search() {} void operator()(raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh index 3ccd73d92c..8f5cfa08ce 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-ext.cuh @@ -27,8 +27,8 @@ template -void select_and_run(raft::device_matrix_view dataset, - raft::device_matrix_view graph, +void select_and_run(raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, DISTANCE_T* const topk_distances_ptr, const DATA_T* const queries_ptr, @@ -54,8 +54,8 @@ void select_and_run(raft::device_matrix_view( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh index 43e3e83f59..ad3012a9ae 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_cta_kernel-inl.cuh @@ -452,8 +452,8 @@ template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh index 033022aea1..4e1d7955f0 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_multi_kernel.cuh @@ -594,8 +594,8 @@ struct search : search_plan_impl { ~search() {} void operator()(raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh index cbffd93caf..39e71733ce 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_plan.cuh @@ -105,8 +105,8 @@ struct search_plan_impl : public search_plan_impl_base { virtual ~search_plan_impl() {} virtual void operator()(raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh index bad2039f8c..a7576bb82b 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta.cuh @@ -226,8 +226,8 @@ struct search : search_plan_impl { } void operator()(raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const result_indices_ptr, // [num_queries, topk] DISTANCE_T* const result_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh index b0130e45d4..a8715620e3 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-ext.cuh @@ -27,8 +27,8 @@ template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] @@ -57,8 +57,8 @@ void select_and_run( // raft::resources const& res, #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh index ca2166ab8d..eeef4496ff 100644 --- a/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/search_single_cta_kernel-inl.cuh @@ -828,8 +828,8 @@ template void select_and_run( // raft::resources const& res, - raft::device_matrix_view dataset, - raft::device_matrix_view graph, + raft::device_matrix_view dataset, + raft::device_matrix_view graph, INDEX_T* const topk_indices_ptr, // [num_queries, topk] DISTANCE_T* const topk_distances_ptr, // [num_queries, topk] const DATA_T* const queries_ptr, // [num_queries, dataset_dim] diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py index 170c57c521..3203893ded 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_00_generate.py @@ -44,8 +44,8 @@ #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ INDEX_T* const topk_indices_ptr, \\ DISTANCE_T* const topk_distances_ptr, \\ const DATA_T* const queries_ptr, \\ @@ -81,12 +81,15 @@ # mxelem = [64, 128, 256] load_types = ["uint4"] search_types = dict( - float_uint32=("float", "uint32_t", "float"), # data_t, idx_t, distance_t + float_uint32=( + "float", + "uint32_t", + "float", + ), # data_t, vec_idx_t, distance_t int8_uint32=("int8_t", "uint32_t", "float"), uint8_uint32=("uint8_t", "uint32_t", "float"), float_uint64=("float", "uint64_t", "float"), ) - # knn for type_path, (data_t, idx_t, distance_t) in search_types.items(): for (mxdim, team) in mxdim_team: diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu index 207028dcec..3593f61f34 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim1024_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu index 4a5c0f106b..adce4ce92f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim128_t8.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu index 93a9f41881..29d29c726f 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim256_t16.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu index fb321b2cf7..22bd83e801 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint32_dim512_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu index e73698460d..9dfb2b6d6a 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim1024_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu index e51fdcbc62..0d83a04a6e 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim128_t8.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu index caa45b5395..23831c9407 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim256_t16.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu index 67e54f0937..a090ff7d7a 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_float_uint64_dim512_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu index 2e929eb4f0..d41ed6dfe8 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim1024_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu index d3e2e78250..572061a3d7 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim128_t8.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu index 802edafdf2..dc77e68171 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim256_t16.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu index 96e91c475e..bbea0936c2 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_int8_uint32_dim512_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu index 6db346c67a..8300b17446 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim1024_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu index 4b1c6c89f4..a52f627d79 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim128_t8.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu index f978a9011a..ad4a86fa37 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim256_t16.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu index 390330ec93..62acdcb554 100644 --- a/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_multi_cta_uint8_uint32_dim512_t32.cu @@ -30,8 +30,8 @@ namespace raft::neighbors::experimental::cagra::detail::multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py index b8f623d4c4..466afa50e4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_00_generate.py @@ -45,8 +45,8 @@ #define instantiate_single_cta_select_and_run( \\ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \\ template void select_and_run( \\ - raft::device_matrix_view dataset, \\ - raft::device_matrix_view graph, \\ + raft::device_matrix_view dataset, \\ + raft::device_matrix_view graph, \\ INDEX_T* const topk_indices_ptr, \\ DISTANCE_T* const topk_distances_ptr, \\ const DATA_T* const queries_ptr, \\ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu index 523f2761fc..39654d5fe6 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim1024_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu index cb8b21bfe8..934c548500 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim128_t8.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu index f5ccfa7572..fa534dbf24 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim256_t16.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu index 1d83979a88..7f158666bf 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint32_dim512_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu index cd588e13ef..2245001657 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim1024_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu index b47db68273..01cbf1d313 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim128_t8.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu index d875080345..2586d94f2d 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim256_t16.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu index 848e71a645..082874f98a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_float_uint64_dim512_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu index de7acb56fe..741d753f7c 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim1024_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu index d0e90603e2..4d609df2e4 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim128_t8.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu index 26764c5ad9..cb345489ee 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim256_t16.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu index 6568ab6dba..91245a4913 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_int8_uint32_dim512_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu index 311f42c9a7..f065e46a9a 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim1024_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu index 197aa71d7b..eb4e799420 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim128_t8.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu index dfb47a1137..4a5ad64d14 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim256_t16.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu index 1b874bcf9b..17fe531016 100644 --- a/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu +++ b/cpp/src/neighbors/detail/cagra/search_single_cta_uint8_uint32_dim512_t32.cu @@ -31,8 +31,8 @@ namespace raft::neighbors::experimental::cagra::detail::single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh index 9969bfd7c1..78a2d08496 100644 --- a/cpp/test/neighbors/ann_cagra.cuh +++ b/cpp/test/neighbors/ann_cagra.cuh @@ -45,7 +45,7 @@ namespace raft::neighbors::experimental::cagra { namespace { // For sort_knn_graph test template -void RandomSuffle(raft::host_matrix_view index) +void RandomSuffle(raft::host_matrix_view index) { for (IdxT i = 0; i < index.extent(0); i++) { uint64_t rand = i; @@ -65,8 +65,8 @@ void RandomSuffle(raft::host_matrix_view index) } template -testing::AssertionResult CheckOrder(raft::host_matrix_view index_test, - raft::host_matrix_view dataset) +testing::AssertionResult CheckOrder(raft::host_matrix_view index_test, + raft::host_matrix_view dataset) { for (IdxT i = 0; i < index_test.extent(0); i++) { const DatatT* const base_vec = dataset.data_handle() + i * dataset.extent(1); @@ -203,15 +203,15 @@ class AnnCagraTest : public ::testing::TestWithParam { search_params.max_queries = ps.max_queries; search_params.team_size = ps.team_size; - auto database_view = raft::make_device_matrix_view( + auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); { cagra::index index(handle_); if (ps.host_dataset) { - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy(database_host.data_handle(), database.data(), database.size(), stream_); - auto database_host_view = raft::make_host_matrix_view( + auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); index = cagra::build(handle_, index_params, database_host_view); } else { @@ -221,12 +221,12 @@ class AnnCagraTest : public ::testing::TestWithParam { } auto index = cagra::deserialize(handle_, "cagra_index"); - auto search_queries_view = raft::make_device_matrix_view( + auto search_queries_view = raft::make_device_matrix_view( search_queries.data(), ps.n_queries, ps.dim); auto indices_out_view = - raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); - auto dists_out_view = - raft::make_device_matrix_view(distances_dev.data(), ps.n_queries, ps.k); + raft::make_device_matrix_view(indices_dev.data(), ps.n_queries, ps.k); + auto dists_out_view = raft::make_device_matrix_view( + distances_dev.data(), ps.n_queries, ps.k); cagra::search( handle_, search_params, index, search_queries_view, indices_out_view, dists_out_view); @@ -234,6 +234,7 @@ class AnnCagraTest : public ::testing::TestWithParam { update_host(indices_Cagra.data(), indices_dev.data(), queries_size, stream_); resource::sync_stream(handle_); } + // for (int i = 0; i < min(ps.n_queries, 10); i++) { // // std::cout << "query " << i << std::end; // print_vector("T", indices_naive.data() + i * ps.k, ps.k, std::cout); @@ -307,17 +308,17 @@ class AnnCagraSortTest : public ::testing::TestWithParam { { { // Step 1: Build a sorted KNN graph by CAGRA knn build - auto database_view = raft::make_device_matrix_view( + auto database_view = raft::make_device_matrix_view( (const DataT*)database.data(), ps.n_rows, ps.dim); - auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); + auto database_host = raft::make_host_matrix(ps.n_rows, ps.dim); raft::copy( database_host.data_handle(), database.data(), database.size(), handle_.get_stream()); - auto database_host_view = raft::make_host_matrix_view( + auto database_host_view = raft::make_host_matrix_view( (const DataT*)database_host.data_handle(), ps.n_rows, ps.dim); cagra::index_params index_params; auto knn_graph = - raft::make_host_matrix(ps.n_rows, index_params.intermediate_graph_degree); + raft::make_host_matrix(ps.n_rows, index_params.intermediate_graph_degree); if (ps.host_dataset) { cagra::build_knn_graph(handle_, database_host_view, knn_graph.view()); @@ -365,7 +366,6 @@ class AnnCagraSortTest : public ::testing::TestWithParam { inline std::vector generate_inputs() { - // Todo(tfeher): MULTI_CTA tests a bug, consider disabling that mode. // TODO(tfeher): test MULTI_CTA kernel with num_Parents>1 to allow multiple CTA per queries std::vector inputs = raft::util::itertools::product( {100}, diff --git a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh index 562e5ac2ca..077aef5202 100644 --- a/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh +++ b/cpp/test/neighbors/ann_cagra/search_kernel_uint64_t.cuh @@ -22,8 +22,8 @@ namespace raft::neighbors::experimental::cagra::detail { namespace multi_cta_search { #define instantiate_kernel_selection(TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ @@ -59,8 +59,8 @@ namespace single_cta_search { #define instantiate_single_cta_select_and_run( \ TEAM_SIZE, MAX_DATASET_DIM, DATA_T, INDEX_T, DISTANCE_T) \ extern template void select_and_run( \ - raft::device_matrix_view dataset, \ - raft::device_matrix_view graph, \ + raft::device_matrix_view dataset, \ + raft::device_matrix_view graph, \ INDEX_T* const topk_indices_ptr, \ DISTANCE_T* const topk_distances_ptr, \ const DATA_T* const queries_ptr, \ From 2a67251413f72e021472b2d590a33c13fa626686 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 26 Jul 2023 16:39:30 +0200 Subject: [PATCH 02/57] wip --- cpp/bench/ann/CMakeLists.txt | 3 + cpp/bench/ann/src/raft/orig_cagra.cu | 22 + cpp/bench/ann/src/raft/orig_cagra_wrapper.h | 330 +++++++++ cpp/bench/ann/src/raft/raft_benchmark.cu | 63 ++ cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 99 ++- cpp/bench/prims/CMakeLists.txt | 142 ++-- cpp/include/raft/neighbors/cagra_types.hpp | 8 +- .../neighbors/detail/cagra/cagra_build.cuh | 1 + cpp/include/raft/util/cache_util.cuh | 2 +- cpp/include/raft/util/integer_utils.hpp | 2 +- cpp/test/CMakeLists.txt | 658 +++++++++--------- 11 files changed, 936 insertions(+), 394 deletions(-) create mode 100644 cpp/bench/ann/src/raft/orig_cagra.cu create mode 100644 cpp/bench/ann/src/raft/orig_cagra_wrapper.h diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 6977d77684..eb95cceea4 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -172,9 +172,12 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) PATH bench/ann/src/raft/raft_benchmark.cu $<$:bench/ann/src/raft/raft_cagra.cu> + $<$:bench/ann/src/raft/orig_cagra.cu> LINKS raft::compiled ) + target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) + target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) diff --git a/cpp/bench/ann/src/raft/orig_cagra.cu b/cpp/bench/ann/src/raft/orig_cagra.cu new file mode 100644 index 0000000000..f68d67dc22 --- /dev/null +++ b/cpp/bench/ann/src/raft/orig_cagra.cu @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "orig_cagra_wrapper.h" + +namespace raft::bench::ann { +template class Cagra; +template class Cagra; +template class Cagra; +} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h new file mode 100644 index 0000000000..e34a35b5f0 --- /dev/null +++ b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +// #include "cuann/ann.h" + +#include "../common/ann_types.hpp" +#include "raft_ann_bench_utils.h" +#include +#include +#include +#include +#include +#include + +namespace raft::bench::ann { + +namespace { +template +std::string get_cagra_dtype() +{ + if constexpr (std::is_same_v) { + return "float"; + } else if constexpr (std::is_same_v) { + return "uint8"; + } else if constexpr (std::is_same_v) { + return "int8"; + } else if constexpr (sizeof(T) == 2) { + return "half"; + } else { + static_assert(!std::is_same_v, "Cagra: type should be float/half/int8/uint8"); + } + return ""; // stop warning of missing return statement +} + +} // namespace + +template +class Cagra : public ANN { + public: + struct BuildParam {}; + + using typename ANN::AnnSearchParam; + struct SearchParam : public AnnSearchParam { + raft::neighbors::experimental::cagra::search_params p; + std::string search_mode; // "single-cta", "multi-cta", or "multi-kernel" + int batch_size; + int k; + }; + + Cagra(Metric metric, int dim, const BuildParam&) : ANN(metric, dim) {} + Cagra(const Cagra&) = delete; + const Cagra& operator=(const Cagra&) = delete; + ~Cagra(); + + void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) override; + + void set_search_param(const AnnSearchParam& param) override; + + void search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream = 0) const override; + + void save(const std::string& file) const override; + void load(const std::string& file) override; + + AlgoProperty get_property() const override + { + AlgoProperty property; + property.dataset_memory_type = MemoryType::Device; + property.query_memory_type = MemoryType::Device; + property.need_dataset_when_search = true; + return property; + } + + void set_search_dataset(const T* dataset, size_t nrow) override + { + dataset_ = dataset; + if (nrow_ == 0) { + nrow_ = nrow; + } else if (nrow_ != nrow) { + throw std::runtime_error("inconsistent nrow between dataset and graph"); + } + }; + + private: + raft::device_resources handle_; + + void check_search_param_(SearchParam& param); + + using ANN::dim_; + SearchParam search_param_; + void* plan_; + + const T* dataset_{nullptr}; + size_t nrow_{0}; + INDEX_T* graph_{nullptr}; + size_t degree_{0}; + + INDEX_T* tmp_neighbors_{nullptr}; +}; + +template +Cagra::~Cagra() +{ + if (plan_) { destroy_plan(plan_); } + RAFT_CUDA_TRY_NO_THROW(cudaFree(graph_)); + RAFT_CUDA_TRY_NO_THROW(cudaFree(tmp_neighbors_)); +} + +template +void Cagra::build(const T*, size_t, cudaStream_t) +{ + throw std::runtime_error("Cagra's build() is not available now, use its tools to build index"); +} + +// // from cagra/tools/cagra_search.cu +// template +// void Cagra::check_search_param_(SearchParam& param) +// { +// if (param.search_mode != "single-cta" && param.search_mode != "multi-cta" && +// param.search_mode != "multi-kernel") { +// throw std::runtime_error("Cagra: illegal search_mode: '" + param.search_mode + "'"); +// } + +// if (param.team_size != 0 && param.team_size != 4 && param.team_size != 8 && +// param.team_size != 16 && param.team_size != 32) { +// throw std::runtime_error("Cagra: team_size must be 0, 4, 8, 16 or 32. " + +// std::to_string(param.team_size) + " has been given.\n"); +// } + +// if (param.internal_k < static_cast(param.k)) { +// throw std::runtime_error("Cagra: internal_k must >= k"); +// } +// if (param.internal_k % 32) { +// throw std::runtime_error("Cagra: internal_k must be multiple of 32"); +// } +// if (param.internal_k > 1024 && param.search_mode != "multi-cta") { +// throw std::runtime_error("Cagra: internal_k must <= 1024 unless in multi-cta mode"); +// } + +// if (param.max_iterations == 0) { +// if (param.search_mode == "multi-cta") { +// param.max_iterations = 1 + std::min(32 * 1.1, 32 + 10.0); +// } else { +// param.max_iterations = 1 + std::min((param.internal_k / param.search_width) * 1.1, +// (param.internal_k / param.search_width) + 10.0); +// } +// } +// if (param.max_iterations < param.min_iterations) { param.max_iterations = param.min_iterations; +// } + +// if (param.search_mode == "multi-cta") { +// int mc_num_cta_per_query = std::max(param.search_width, param.internal_k / 32); +// if (mc_num_cta_per_query * 32 < param.k) { +// throw std::runtime_error("mc_num_cta_per_query (" + std::to_string(mc_num_cta_per_query) + +// ") * 32 must be >= k (" + std::to_string(param.k) + +// ") when search_mode is multi-cta"); +// } +// } +// } + +template +void Cagra::set_search_param(const AnnSearchParam& param) +{ + if (!dataset_ || nrow_ == 0) { throw std::runtime_error("Cagra: dataset is not loaded"); } + if (!graph_ || degree_ == 0) { throw std::runtime_error("Cagra: index is not loaded"); } + + auto new_search_param = dynamic_cast(param); + // check_search_param_(new_search_param); + + // if (new_search_param.search_mode != search_param_.search_mode || + // new_search_param.batch_size != search_param_.batch_size || + // new_search_param.k != search_param_.k || + // new_search_param.team_size != search_param_.team_size || + // new_search_param.internal_k != search_param_.internal_k || + // new_search_param.search_width != search_param_.search_width || + // new_search_param.min_iterations != search_param_.min_iterations || + // new_search_param.max_iterations != search_param_.max_iterations) { + if (plan_) { destroy_plan(plan_); } + + if (new_search_param.batch_size != search_param_.batch_size || + new_search_param.k != search_param_.k) { + RAFT_CUDA_TRY(cudaFree(tmp_neighbors_)); + RAFT_CUDA_TRY(cudaMalloc(&tmp_neighbors_, + sizeof(size_t) * new_search_param.batch_size * new_search_param.k)); + } + search_param_ = new_search_param; + + create_plan(&plan_, + get_cagra_dtype(), + 0, // team_size + search_param_.search_mode, + search_param_.k, + search_param_.p.itopk_size, + search_param_.p.num_parents, + search_param_.p.min_iterations, + search_param_.p.max_iterations, + search_param_.batch_size, + 0, // load_bit_length + 0, // thread_block_size + search_param_.search_mode == "multi-cta" ? "hash" : "auto", // hashmap_mode + 0, // hashmap_min_bitlen + 0.5, // hashmap_max_fill_rate + nrow_, + dim_, + degree_, + dataset_, + graph_); +} +//} + +template +void Cagra::search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream) const +{ + static_assert(std::is_same_v); + assert(plan_); + + if (k != search_param_.k) { + throw std::runtime_error("wrong configuration: k (" + std::to_string(k) + + ") != search_param.k (" + std::to_string(search_param_.k) + ")"); + } + if (batch_size > search_param_.batch_size) { + throw std::runtime_error("wrong configuration: batch_size (" + std::to_string(batch_size) + + ") > search_param.batch_size (" + + std::to_string(search_param_.batch_size) + ")"); + } + + // uint32_t neighbors_ptr = std::is_same::value ? tmp_neighbors_ + + ::search(plan_, + tmp_neighbors_, + distances, + queries, + batch_size, + 1, + 0x128394, + nullptr, + 0, + nullptr, + stream); + + raft::linalg::unaryOp(neighbors, + tmp_neighbors_, + batch_size * k, + raft::cast_op(), + resource::get_cuda_stream(handle_)); +} + +template +void Cagra::save(const std::string& file) const +{ + FILE* fp = fopen(file.c_str(), "w"); + if (!fp) { throw std::runtime_error("fail to open " + file + " for writing"); } + + if (fwrite(&nrow_, sizeof(nrow_), 1, fp) != 1) { + throw std::runtime_error("fwrite() " + file + " failed"); + } + if (fwrite(°ree_, sizeof(degree_), 1, fp) != 1) { + throw std::runtime_error("fwrite() " + file + " failed"); + } + + size_t total = nrow_ * degree_; + auto h_graph = new INDEX_T[total]; + RAFT_CUDA_TRY(cudaMemcpy(h_graph, graph_, sizeof(*graph_) * total, cudaMemcpyDeviceToHost)); + if (fwrite(h_graph, sizeof(*h_graph), total, fp) != total) { + throw std::runtime_error("fwrite() " + file + " failed"); + } + delete[] h_graph; +} + +template +void Cagra::load(const std::string& file) +{ + FILE* fp = fopen(file.c_str(), "r"); + if (!fp) { throw std::runtime_error("fail to open " + file); } + + size_t nrow; + if (fread(&nrow, sizeof(nrow), 1, fp) != 1) { + throw std::runtime_error("fread() " + file + " failed"); + } + if (nrow_ == 0) { + nrow_ = nrow; + } else if (nrow_ != nrow) { + throw std::runtime_error("inconsistent nrow between dataset and graph"); + } + + if (fread(°ree_, sizeof(degree_), 1, fp) != 1) { + throw std::runtime_error("fread() " + file + " failed"); + } + + size_t total = nrow_ * degree_; + auto h_graph = new INDEX_T[total]; + if (fread(h_graph, sizeof(*h_graph), total, fp) != total) { + throw std::runtime_error("fread() " + file + " failed"); + } + RAFT_CUDA_TRY(cudaMalloc(&graph_, sizeof(*graph_) * total)); + RAFT_CUDA_TRY(cudaMemcpy(graph_, h_graph, sizeof(*graph_) * total, cudaMemcpyHostToDevice)); + delete[] h_graph; +} + +} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index b43f52eb5c..772cc49873 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -45,6 +45,10 @@ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; +#include "orig_cagra_wrapper.h" +extern template class raft::bench::ann::Cagra; +extern template class raft::bench::ann::Cagra; +extern template class raft::bench::ann::Cagra; #endif #define JSON_DIAGNOSTICS 1 #include @@ -144,6 +148,60 @@ void parse_search_param(const nlohmann::json& conf, if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); } if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } + if (conf.contains("algo")) { + if (conf.at("algo") == "single_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; + } else if (conf.at("algo") == "multi_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; + } else if (conf.at("algo") == "multi_kernel") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; + } else if (conf.at("algo") == "auto") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; + } else { + std::string tmp = conf.at("algo"); + THROW("Invalid value for algo: %s", tmp.c_str()); + } + } +} +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::BuildParam& param) +{ + // if (conf.contains("index_dim")) { + // param.graph_degree = conf.at("index_dim"); + // param.intermediate_graph_degree = param.graph_degree * 2; + // } + // if (conf.contains("intermediate_graph_degree")) { + // param.intermediate_graph_degree = conf.at("intermediate_graph_degree"); + // } +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::SearchParam& param) +{ + param.k = conf.at("k"); + param.batch_size = conf.at("batch_size"); + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.num_parents = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } + if (conf.contains("algo")) { + if (conf.at("algo") == "single_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; + param.search_mode = "single-cta"; + } else if (conf.at("algo") == "multi_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; + param.search_mode = "multi-cta"; + } else if (conf.at("algo") == "multi_kernel") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; + param.search_mode = "multi-kernel"; + } else if (conf.at("algo") == "auto") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; + } else { + std::string tmp = conf.at("algo"); + THROW("Invalid value for algo: %s", tmp.c_str()); + } + } } #endif @@ -190,6 +248,11 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } + if (algo == "cagra") { + typename raft::bench::ann::Cagra::BuildParam param; + parse_build_param(conf, param); + ann = std::make_unique>(metric, dim, param); + } #endif if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index b9501280d2..bfd25cf547 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -19,10 +19,13 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -79,10 +82,14 @@ class RaftCagra : public ANN { void save(const std::string& file) const override; void load(const std::string&) override; + void set_search_dataset(const T* dataset, size_t nrow) override; + private: + std::shared_ptr stream_pool; raft::device_resources handle_; BuildParam index_params_; raft::neighbors::cagra::search_params search_params_; + raft::device_matrix graph_; std::optional> index_; int device_; int dimension_; @@ -92,9 +99,12 @@ class RaftCagra : public ANN { template RaftCagra::RaftCagra(Metric metric, int dim, const BuildParam& param) : ANN(metric, dim), + stream_pool(std::make_shared(2)), + handle_(rmm::cuda_stream_default, stream_pool), index_params_(param), dimension_(dim), - mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) + mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull), + graph_(make_device_matrix(handle_, 0, 0)) { rmm::mr::set_current_device_resource(&mr_); index_params_.metric = parse_metric_type(metric); @@ -127,15 +137,96 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) template void RaftCagra::save(const std::string& file) const { - raft::neighbors::cagra::serialize(handle_, file, *index_); + // 1 orig serialization + // raft::neighbors::cagra::serialize(handle_, file, *index_); + + // 2. Saving only knn graph + // std::ofstream of(file, std::ios::out | std::ios::binary); + // serialize_mdspan(handle_, of, index_->graph()); + // of.close(); + + size_t degree = index_->graph_degree(); + std::cout << "Saving knn graph" << std::endl; + for (int i = 0; i < std::min(index_->size(), 10); i++) { + print_vector("k", index_->graph().data_handle() + i * degree, degree, std::cout); + } + + // Orig CAGRA type of serialization + std::ofstream of(file, std::ios::out | std::ios::binary); + std::size_t size = index_->size(); + // std::size_t degree = index_->graph_degree(); + + of.write(reinterpret_cast(&size), sizeof(size)); + of.write(reinterpret_cast(°ree), sizeof(degree)); + + auto graph_h = make_host_matrix(size, degree); + raft::copy(graph_h.data_handle(), + index_->graph().data_handle(), + index_->graph().size(), + resource::get_cuda_stream(handle_)); + resource::sync_stream(handle_); + + of.write(reinterpret_cast(graph_h.data_handle()), graph_h.size() * sizeof(IdxT)); + + of.close(); return; } template void RaftCagra::load(const std::string& file) { - index_ = raft::neighbors::cagra::deserialize(handle_, file); - return; + // 1. Original index saving method + // index_ = raft::neighbors::cagra::deserialize(handle_, file); + + // // 2. read only knn_graph + // std::ifstream is(file, std::ios::in | std::ios::binary); + // raft::detail::numpy_serializer::header_t header = + // raft::detail::numpy_serializer::read_header(is); is.seekg(0); /* rewind*/ graph_ = + // make_device_matrix(handle_, header.shape[0], header.shape[1]); + // deserialize_mdspan(handle_, is, graph_.view()); + // is.close(); + + // 3. Cagra's knn file format + std::ifstream ifs(file, std::ios::in | std::ios::binary); + if (!ifs) { + throw std::runtime_error("File not exist : " + file + " (`" + __func__ + "` in " + __FILE__ + + ")"); + } + + std::size_t size, degree; + + ifs.read(reinterpret_cast(&size), sizeof(size)); + ifs.read(reinterpret_cast(°ree), sizeof(degree)); + + auto graph_h = make_host_matrix(size, degree); + graph_ = make_device_matrix(handle_, size, degree); + + for (std::size_t i = 0; i < size; i++) { + ifs.read(reinterpret_cast(graph_h.data_handle() + i * degree), sizeof(IdxT) * degree); + } + ifs.close(); + raft::copy( + graph_.data_handle(), graph_h.data_handle(), graph_.size(), resource::get_cuda_stream(handle_)); + resource::sync_stream(handle_); + + std::cout << "Loading knn graph" << std::endl; + for (int i = 0; i < std::min(graph_.extent(0), 10); i++) { + print_vector("k", graph_.data_handle() + i * degree, degree, std::cout); + } +} + +template +void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) +{ + std::cout << "Creating dataset view " << nrow << "x" << this->dim_ << std::endl; + auto dataset_v = raft::make_host_matrix_view(dataset, nrow, this->dim_); + index_.emplace( + handle_, parse_metric_type(this->metric_), dataset_v, make_const_mdspan(graph_.view())); + size_t degree = index_->graph_degree(); + std::cout << "Restored index" << std::endl; + for (int i = 0; i < std::min(index_->size(), 10); i++) { + print_vector("k", index_->graph().data_handle() + i * degree, degree, std::cout); + } } template diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index e8d4739384..4a26b5bb45 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -73,83 +73,83 @@ function(ConfigureBench) endfunction() if(BUILD_PRIMS_BENCH) - ConfigureBench( - NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu - bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu - bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp - ) - - ConfigureBench( - NAME - DISTANCE_BENCH - PATH - bench/prims/distance/distance_cosine.cu - bench/prims/distance/distance_exp_l2.cu - bench/prims/distance/distance_l1.cu - bench/prims/distance/distance_unexp_l2.cu - bench/prims/distance/fused_l2_nn.cu - bench/prims/distance/masked_nn.cu - bench/prims/distance/kernels.cu - bench/prims/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME - LINALG_BENCH - PATH - bench/prims/linalg/add.cu - bench/prims/linalg/map_then_reduce.cu - bench/prims/linalg/matrix_vector_op.cu - bench/prims/linalg/norm.cu - bench/prims/linalg/normalize.cu - bench/prims/linalg/reduce_cols_by_key.cu - bench/prims/linalg/reduce_rows_by_key.cu - bench/prims/linalg/reduce.cu - bench/prims/main.cpp - ) - - ConfigureBench( - NAME - MATRIX_BENCH - PATH - bench/prims/matrix/argmin.cu - bench/prims/matrix/gather.cu - bench/prims/matrix/select_k.cu - bench/prims/matrix/main.cpp - OPTIONAL - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureBench( - NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu - bench/prims/random/rng.cu bench/prims/main.cpp - ) - - ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) + # ConfigureBench( + # NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu + # bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureBench( + # NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu + # bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp + # ) + + # ConfigureBench( + # NAME + # DISTANCE_BENCH + # PATH + # bench/prims/distance/distance_cosine.cu + # bench/prims/distance/distance_exp_l2.cu + # bench/prims/distance/distance_l1.cu + # bench/prims/distance/distance_unexp_l2.cu + # bench/prims/distance/fused_l2_nn.cu + # bench/prims/distance/masked_nn.cu + # bench/prims/distance/kernels.cu + # bench/prims/main.cpp + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureBench( + # NAME + # LINALG_BENCH + # PATH + # bench/prims/linalg/add.cu + # bench/prims/linalg/map_then_reduce.cu + # bench/prims/linalg/matrix_vector_op.cu + # bench/prims/linalg/norm.cu + # bench/prims/linalg/normalize.cu + # bench/prims/linalg/reduce_cols_by_key.cu + # bench/prims/linalg/reduce_rows_by_key.cu + # bench/prims/linalg/reduce.cu + # bench/prims/main.cpp + # ) + + # ConfigureBench( + # NAME + # MATRIX_BENCH + # PATH + # bench/prims/matrix/argmin.cu + # bench/prims/matrix/gather.cu + # bench/prims/matrix/select_k.cu + # bench/prims/matrix/main.cpp + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureBench( + # NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu + # bench/prims/random/rng.cu bench/prims/main.cpp + # ) + + # ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) ConfigureBench( NAME NEIGHBORS_BENCH PATH - bench/prims/neighbors/knn/brute_force_float_int64_t.cu - bench/prims/neighbors/knn/brute_force_float_uint32_t.cu + # bench/prims/neighbors/knn/brute_force_float_int64_t.cu + # bench/prims/neighbors/knn/brute_force_float_uint32_t.cu bench/prims/neighbors/knn/cagra_float_uint32_t.cu - bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu - bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu - bench/prims/neighbors/refine_float_int64_t.cu - bench/prims/neighbors/refine_uint8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu + # bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + # bench/prims/neighbors/refine_float_int64_t.cu + # bench/prims/neighbors/refine_uint8_t_int64_t.cu bench/prims/main.cpp OPTIONAL LIB diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 130c7d70c8..375b5c5a0e 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -113,7 +112,6 @@ static_assert(std::is_aggregate_v); */ template struct index : ann::index { - using AlignDim = raft::Pow2<16 / sizeof(T)>; static_assert(!raft::is_narrowing_v, "IdxT must be able to represent all values of uint32_t"); @@ -252,7 +250,7 @@ struct index : ann::index { void update_dataset(raft::resources const& res, raft::device_matrix_view dataset) { - if (dataset.extent(1) % AlignDim::Value != 0) { + if (dataset.extent(1) * sizeof(T) % 16 != 0) { RAFT_LOG_DEBUG("Creating a padded copy of CAGRA dataset in device memory"); copy_padded(res, dataset); } else { @@ -308,8 +306,8 @@ struct index : ann::index { void copy_padded(raft::resources const& res, mdspan, row_major, data_accessor> dataset) { - dataset_ = - make_device_matrix(res, dataset.extent(0), AlignDim::roundUp(dataset.extent(1))); + size_t padded_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); + dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); if (dataset_.extent(1) == dataset.extent(1)) { raft::copy(dataset_.data_handle(), dataset.data_handle(), diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index 738be62e48..6e1b79da71 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -148,6 +148,7 @@ void build_knn_graph(raft::resources const& res, distances.data_handle(), batch.size(), distances.extent(1)); ivf_pq::search(res, *search_params, index, queries_view, neighbors_view, distances_view); + if constexpr (is_host_mdspan_v) { raft::copy(neighbors_host.data_handle(), neighbors.data_handle(), diff --git a/cpp/include/raft/util/cache_util.cuh b/cpp/include/raft/util/cache_util.cuh index bbd84d8bf2..e305f6053b 100644 --- a/cpp/include/raft/util/cache_util.cuh +++ b/cpp/include/raft/util/cache_util.cuh @@ -328,7 +328,7 @@ __global__ void assign_cache_idx(const int* keys, * @param [out] is_cached whether the element is cached size[n] * @param [in] time iteration counter (used for time stamping) */ -__global__ inline void get_cache_idx(int* keys, +__global__ static void get_cache_idx(int* keys, int n, int* cached_keys, int n_cache_sets, diff --git a/cpp/include/raft/util/integer_utils.hpp b/cpp/include/raft/util/integer_utils.hpp index 6faab5381c..d22a982a88 100644 --- a/cpp/include/raft/util/integer_utils.hpp +++ b/cpp/include/raft/util/integer_utils.hpp @@ -35,7 +35,7 @@ namespace raft { * `modulus` is positive. */ template -inline S round_up_safe(S number_to_round, S modulus) +constexpr inline S round_up_safe(S number_to_round, S modulus) { auto remainder = number_to_round % modulus; if (remainder == 0) { return number_to_round; } diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 41dab0e388..807427fdc1 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -88,248 +88,248 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - ConfigureTest( - NAME - CLUSTER_TEST - PATH - test/cluster/kmeans.cu - test/cluster/kmeans_balanced.cu - test/cluster/cluster_solvers.cu - test/cluster/linkage.cu - test/cluster/kmeans_find_k.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - CORE_TEST - PATH - test/core/logger.cpp - test/core/math_device.cu - test/core/math_host.cpp - test/core/operators_device.cu - test/core/operators_host.cpp - test/core/handle.cpp - test/core/interruptible.cu - test/core/nvtx.cpp - test/core/mdarray.cu - test/core/mdspan_utils.cu - test/core/numpy_serializer.cu - test/core/memory_type.cpp - test/core/sparse_matrix.cu - test/core/sparse_matrix.cpp - test/core/span.cpp - test/core/span.cu - test/core/temporary_device_buffer.cu - test/test.cpp - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - DISTANCE_TEST - PATH - test/distance/dist_adj.cu - test/distance/dist_adj_distance_instance.cu - test/distance/dist_canberra.cu - test/distance/dist_correlation.cu - test/distance/dist_cos.cu - test/distance/dist_hamming.cu - test/distance/dist_hellinger.cu - test/distance/dist_inner_product.cu - test/distance/dist_jensen_shannon.cu - test/distance/dist_kl_divergence.cu - test/distance/dist_l1.cu - test/distance/dist_l2_exp.cu - test/distance/dist_l2_unexp.cu - test/distance/dist_l2_sqrt_exp.cu - test/distance/dist_l_inf.cu - test/distance/dist_lp_unexp.cu - test/distance/dist_russell_rao.cu - test/distance/masked_nn.cu - test/distance/masked_nn_compress_to_bits.cu - test/distance/fused_l2_nn.cu - test/distance/gram.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - list( - APPEND - EXT_HEADER_TEST_SOURCES - test/ext_headers/raft_neighbors_brute_force.cu - test/ext_headers/raft_distance_distance.cu - test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - test/ext_headers/raft_matrix_detail_select_k.cu - test/ext_headers/raft_neighbors_ball_cover.cu - test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - test/ext_headers/raft_distance_fused_l2_nn.cu - test/ext_headers/raft_neighbors_ivf_pq.cu - test/ext_headers/raft_util_memory_pool.cpp - test/ext_headers/raft_neighbors_ivf_flat.cu - test/ext_headers/raft_core_logger.cpp - test/ext_headers/raft_neighbors_refine.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - test/ext_headers/raft_neighbors_detail_selection_faiss.cu - test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu - ) - - # Test that the split headers compile in isolation with: - # - # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined - # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined - # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. - ConfigureTest( - NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB - EXPLICIT_INSTANTIATE_ONLY - ) - ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) - ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - ConfigureTest( - NAME - LINALG_TEST - PATH - test/linalg/add.cu - test/linalg/axpy.cu - test/linalg/binary_op.cu - test/linalg/cholesky_r1.cu - test/linalg/coalesced_reduction.cu - test/linalg/divide.cu - test/linalg/dot.cu - test/linalg/eig.cu - test/linalg/eig_sel.cu - test/linalg/gemm_layout.cu - test/linalg/gemv.cu - test/linalg/map.cu - test/linalg/map_then_reduce.cu - test/linalg/matrix_vector.cu - test/linalg/matrix_vector_op.cu - test/linalg/mean_squared_error.cu - test/linalg/multiply.cu - test/linalg/norm.cu - test/linalg/normalize.cu - test/linalg/power.cu - test/linalg/randomized_svd.cu - test/linalg/reduce.cu - test/linalg/reduce_cols_by_key.cu - test/linalg/reduce_rows_by_key.cu - test/linalg/rsvd.cu - test/linalg/sqrt.cu - test/linalg/strided_reduction.cu - test/linalg/subtract.cu - test/linalg/svd.cu - test/linalg/ternary_op.cu - test/linalg/transpose.cu - test/linalg/unary_op.cu - ) - - ConfigureTest( - NAME - MATRIX_TEST - PATH - test/matrix/argmax.cu - test/matrix/argmin.cu - test/matrix/columnSort.cu - test/matrix/diagonal.cu - test/matrix/gather.cu - test/matrix/eye.cu - test/matrix/linewise_op.cu - test/matrix/math.cu - test/matrix/matrix.cu - test/matrix/norm.cu - test/matrix/reverse.cu - test/matrix/slice.cu - test/matrix/triangular.cu - test/sparse/spectral_matrix.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) - - ConfigureTest( - NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - RANDOM_TEST - PATH - test/random/make_blobs.cu - test/random/make_regression.cu - test/random/multi_variable_gaussian.cu - test/random/permute.cu - test/random/rng.cu - test/random/rng_discrete.cu - test/random/rng_int.cu - test/random/rmat_rectangular_generator.cu - test/random/sample_without_replacement.cu - ) - - ConfigureTest( - NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu - test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_TEST - PATH - test/sparse/add.cu - test/sparse/convert_coo.cu - test/sparse/convert_csr.cu - test/sparse/csr_row_slice.cu - test/sparse/csr_to_dense.cu - test/sparse/csr_transpose.cu - test/sparse/degree.cu - test/sparse/filter.cu - test/sparse/norm.cu - test/sparse/normalize.cu - test/sparse/reduce.cu - test/sparse/row_op.cu - test/sparse/sort.cu - test/sparse/spgemmi.cu - test/sparse/symmetrize.cu - ) - - ConfigureTest( - NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - SPARSE_NEIGHBORS_TEST - PATH - test/sparse/neighbors/connect_components.cu - test/sparse/neighbors/brute_force.cu - test/sparse/neighbors/knn_graph.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - NEIGHBORS_TEST - PATH - test/neighbors/knn.cu - test/neighbors/fused_l2_knn.cu - test/neighbors/tiled_knn.cu - test/neighbors/haversine.cu - test/neighbors/ball_cover.cu - test/neighbors/epsilon_neighborhood.cu - test/neighbors/refine.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) + # ConfigureTest( + # NAME + # CLUSTER_TEST + # PATH + # test/cluster/kmeans.cu + # test/cluster/kmeans_balanced.cu + # test/cluster/cluster_solvers.cu + # test/cluster/linkage.cu + # test/cluster/kmeans_find_k.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # CORE_TEST + # PATH + # test/core/logger.cpp + # test/core/math_device.cu + # test/core/math_host.cpp + # test/core/operators_device.cu + # test/core/operators_host.cpp + # test/core/handle.cpp + # test/core/interruptible.cu + # test/core/nvtx.cpp + # test/core/mdarray.cu + # test/core/mdspan_utils.cu + # test/core/numpy_serializer.cu + # test/core/memory_type.cpp + # test/core/sparse_matrix.cu + # test/core/sparse_matrix.cpp + # test/core/span.cpp + # test/core/span.cu + # test/core/temporary_device_buffer.cu + # test/test.cpp + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # DISTANCE_TEST + # PATH + # test/distance/dist_adj.cu + # test/distance/dist_adj_distance_instance.cu + # test/distance/dist_canberra.cu + # test/distance/dist_correlation.cu + # test/distance/dist_cos.cu + # test/distance/dist_hamming.cu + # test/distance/dist_hellinger.cu + # test/distance/dist_inner_product.cu + # test/distance/dist_jensen_shannon.cu + # test/distance/dist_kl_divergence.cu + # test/distance/dist_l1.cu + # test/distance/dist_l2_exp.cu + # test/distance/dist_l2_unexp.cu + # test/distance/dist_l2_sqrt_exp.cu + # test/distance/dist_l_inf.cu + # test/distance/dist_lp_unexp.cu + # test/distance/dist_russell_rao.cu + # test/distance/masked_nn.cu + # test/distance/masked_nn_compress_to_bits.cu + # test/distance/fused_l2_nn.cu + # test/distance/gram.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # list( + # APPEND + # EXT_HEADER_TEST_SOURCES + # test/ext_headers/raft_neighbors_brute_force.cu + # test/ext_headers/raft_distance_distance.cu + # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + # test/ext_headers/raft_matrix_detail_select_k.cu + # test/ext_headers/raft_neighbors_ball_cover.cu + # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + # test/ext_headers/raft_distance_fused_l2_nn.cu + # test/ext_headers/raft_neighbors_ivf_pq.cu + # test/ext_headers/raft_util_memory_pool.cpp + # test/ext_headers/raft_neighbors_ivf_flat.cu + # test/ext_headers/raft_core_logger.cpp + # test/ext_headers/raft_neighbors_refine.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + # test/ext_headers/raft_neighbors_detail_selection_faiss.cu + # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + # ) + + # # Test that the split headers compile in isolation with: + # # + # # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + # ConfigureTest( + # NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + # ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + # ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + # ConfigureTest( + # NAME + # LINALG_TEST + # PATH + # test/linalg/add.cu + # test/linalg/axpy.cu + # test/linalg/binary_op.cu + # test/linalg/cholesky_r1.cu + # test/linalg/coalesced_reduction.cu + # test/linalg/divide.cu + # test/linalg/dot.cu + # test/linalg/eig.cu + # test/linalg/eig_sel.cu + # test/linalg/gemm_layout.cu + # test/linalg/gemv.cu + # test/linalg/map.cu + # test/linalg/map_then_reduce.cu + # test/linalg/matrix_vector.cu + # test/linalg/matrix_vector_op.cu + # test/linalg/mean_squared_error.cu + # test/linalg/multiply.cu + # test/linalg/norm.cu + # test/linalg/normalize.cu + # test/linalg/power.cu + # test/linalg/randomized_svd.cu + # test/linalg/reduce.cu + # test/linalg/reduce_cols_by_key.cu + # test/linalg/reduce_rows_by_key.cu + # test/linalg/rsvd.cu + # test/linalg/sqrt.cu + # test/linalg/strided_reduction.cu + # test/linalg/subtract.cu + # test/linalg/svd.cu + # test/linalg/ternary_op.cu + # test/linalg/transpose.cu + # test/linalg/unary_op.cu + # ) + + # ConfigureTest( + # NAME + # MATRIX_TEST + # PATH + # test/matrix/argmax.cu + # test/matrix/argmin.cu + # test/matrix/columnSort.cu + # test/matrix/diagonal.cu + # test/matrix/gather.cu + # test/matrix/eye.cu + # test/matrix/linewise_op.cu + # test/matrix/math.cu + # test/matrix/matrix.cu + # test/matrix/norm.cu + # test/matrix/reverse.cu + # test/matrix/slice.cu + # test/matrix/triangular.cu + # test/sparse/spectral_matrix.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + + # ConfigureTest( + # NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # RANDOM_TEST + # PATH + # test/random/make_blobs.cu + # test/random/make_regression.cu + # test/random/multi_variable_gaussian.cu + # test/random/permute.cu + # test/random/rng.cu + # test/random/rng_discrete.cu + # test/random/rng_int.cu + # test/random/rmat_rectangular_generator.cu + # test/random/sample_without_replacement.cu + # ) + + # ConfigureTest( + # NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + # test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_TEST + # PATH + # test/sparse/add.cu + # test/sparse/convert_coo.cu + # test/sparse/convert_csr.cu + # test/sparse/csr_row_slice.cu + # test/sparse/csr_to_dense.cu + # test/sparse/csr_transpose.cu + # test/sparse/degree.cu + # test/sparse/filter.cu + # test/sparse/norm.cu + # test/sparse/normalize.cu + # test/sparse/reduce.cu + # test/sparse/row_op.cu + # test/sparse/sort.cu + # test/sparse/spgemmi.cu + # test/sparse/symmetrize.cu + # ) + + # ConfigureTest( + # NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # SPARSE_NEIGHBORS_TEST + # PATH + # test/sparse/neighbors/connect_components.cu + # test/sparse/neighbors/brute_force.cu + # test/sparse/neighbors/knn_graph.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # NEIGHBORS_TEST + # PATH + # test/neighbors/knn.cu + # test/neighbors/fused_l2_knn.cu + # test/neighbors/tiled_knn.cu + # test/neighbors/haversine.cu + # test/neighbors/ball_cover.cu + # test/neighbors/epsilon_neighborhood.cu + # test/neighbors/refine.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) ConfigureTest( NAME @@ -355,76 +355,110 @@ if(BUILD_TESTS) 100 ) - ConfigureTest( - NAME - NEIGHBORS_ANN_IVF_TEST - PATH - test/neighbors/ann_ivf_flat/test_float_int64_t.cu - test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_float_uint32_t.cu - test/neighbors/ann_ivf_pq/test_float_int64_t.cu - test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu - test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - GPUS - 1 - PERCENT - 100 - ) - - ConfigureTest( - NAME NEIGHBORS_SELECTION_TEST PATH test/neighbors/selection.cu LIB EXPLICIT_INSTANTIATE_ONLY - GPUS 1 PERCENT 50 - ) - - ConfigureTest( - NAME - STATS_TEST - PATH - test/stats/accuracy.cu - test/stats/adjusted_rand_index.cu - test/stats/completeness_score.cu - test/stats/contingencyMatrix.cu - test/stats/cov.cu - test/stats/dispersion.cu - test/stats/entropy.cu - test/stats/histogram.cu - test/stats/homogeneity_score.cu - test/stats/information_criterion.cu - test/stats/kl_divergence.cu - test/stats/mean.cu - test/stats/meanvar.cu - test/stats/mean_center.cu - test/stats/minmax.cu - test/stats/mutual_info_score.cu - test/stats/r2_score.cu - test/stats/rand_index.cu - test/stats/regression_metrics.cu - test/stats/silhouette_score.cu - test/stats/stddev.cu - test/stats/sum.cu - test/stats/trustworthiness.cu - test/stats/weighted_mean.cu - test/stats/v_measure.cu - LIB - EXPLICIT_INSTANTIATE_ONLY - ) - - ConfigureTest( - NAME - UTILS_TEST - PATH - test/core/seive.cu - test/util/bitonic_sort.cu - test/util/cudart_utils.cpp - test/util/device_atomics.cu - test/util/integer_utils.cpp - test/util/pow2_utils.cu - test/util/reduction.cu - ) + # ConfigureTest( + # NAME + # NEIGHBORS_ANN_IVF_TEST + # PATH + # test/neighbors/ann_ivf_flat/test_float_int64_t.cu + # test/neighbors/ann_ivf_flat/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_flat/test_uint8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_float_uint32_t.cu + # test/neighbors/ann_ivf_pq/test_float_int64_t.cu + # test/neighbors/ann_ivf_pq/test_int8_t_int64_t.cu + # test/neighbors/ann_ivf_pq/test_uint8_t_int64_t.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # GPUS + # 1 + # PERCENT + # 100 + # ) + + # ConfigureTest( + # NAME NEIGHBORS_SELECTION_TEST PATH test/neighbors/selection.cu LIB EXPLICIT_INSTANTIATE_ONLY + # GPUS 1 PERCENT 50 + # ) + + # ConfigureTest( + # NAME + # STATS_TEST + # PATH + # test/stats/accuracy.cu + # test/stats/adjusted_rand_index.cu + # test/stats/completeness_score.cu + # test/stats/contingencyMatrix.cu + # test/stats/cov.cu + # test/stats/dispersion.cu + # test/stats/entropy.cu + # test/stats/histogram.cu + # test/stats/homogeneity_score.cu + # test/stats/information_criterion.cu + # test/stats/kl_divergence.cu + # test/stats/mean.cu + # test/stats/meanvar.cu + # test/stats/mean_center.cu + # test/stats/minmax.cu + # test/stats/mutual_info_score.cu + # test/stats/r2_score.cu + # test/stats/rand_index.cu + # test/stats/regression_metrics.cu + # test/stats/silhouette_score.cu + # test/stats/stddev.cu + # test/stats/sum.cu + # test/stats/trustworthiness.cu + # test/stats/weighted_mean.cu + # test/stats/v_measure.cu + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # STATS_TEST + # PATH + # test/stats/accuracy.cu + # test/stats/adjusted_rand_index.cu + # test/stats/completeness_score.cu + # test/stats/contingencyMatrix.cu + # test/stats/cov.cu + # test/stats/dispersion.cu + # test/stats/entropy.cu + # test/stats/histogram.cu + # test/stats/homogeneity_score.cu + # test/stats/information_criterion.cu + # test/stats/kl_divergence.cu + # test/stats/mean.cu + # test/stats/meanvar.cu + # test/stats/mean_center.cu + # test/stats/minmax.cu + # test/stats/mutual_info_score.cu + # test/stats/r2_score.cu + # test/stats/rand_index.cu + # test/stats/regression_metrics.cu + # test/stats/silhouette_score.cu + # test/stats/stddev.cu + # test/stats/sum.cu + # test/stats/trustworthiness.cu + # test/stats/weighted_mean.cu + # test/stats/v_measure.cu + # OPTIONAL + # LIB + # EXPLICIT_INSTANTIATE_ONLY + # ) + + # ConfigureTest( + # NAME + # UTILS_TEST + # PATH + # test/core/seive.cu + # test/util/bitonic_sort.cu + # test/util/cudart_utils.cpp + # test/util/device_atomics.cu + # test/util/integer_utils.cpp + # test/util/pow2_utils.cu + # test/util/reduction.cu + # ) endif() # ################################################################################################## From c3ce61eaa1bfdc702931801fa28182f0df83d603 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 31 Jul 2023 09:28:09 +0200 Subject: [PATCH 03/57] Orig CAGRA benchmark works --- cpp/bench/ann/conf/deep-100M.json | 776 +++++++++++--------- cpp/bench/ann/src/common/benchmark.hpp | 2 +- cpp/bench/ann/src/raft/orig_cagra_wrapper.h | 27 +- cpp/bench/ann/src/raft/raft_benchmark.cu | 47 +- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 19 +- cpp/include/raft/neighbors/cagra_types.hpp | 4 +- 6 files changed, 505 insertions(+), 370 deletions(-) diff --git a/cpp/bench/ann/conf/deep-100M.json b/cpp/bench/ann/conf/deep-100M.json index 97d670b614..93fa3e52eb 100644 --- a/cpp/bench/ann/conf/deep-100M.json +++ b/cpp/bench/ann/conf/deep-100M.json @@ -1,25 +1,23 @@ { - "dataset" : { - "name" : "deep-100M", - "base_file" : "data/deep-1B/base.1B.fbin", - "subset_size" : 100000000, - "query_file" : "data/deep-1B/query.public.10K.fbin", - "distance" : "euclidean" + "dataset": { + "name": "deep-100M", + "base_file": "/data/deep-1B/base.1B.fbin", + "subset_size": 100000000, + "query_file": "/data/deep-1B/query.public.10K.fbin", + "distance": "euclidean" }, - - "search_basic_param" : { + "search_basic_param": { "batch_size" : 10000, - "k" : 10, - "run_count" : 2 + "k": 10, + "run_count": 2 }, - - "index" : [ + "index": [ { - "name" : "hnswlib.M12", - "algo" : "hnswlib", + "name": "hnswlib.M12", + "algo": "hnswlib", "build_param": {"M":12, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-100M/hnswlib/M12", - "search_params" : [ + "file": "index/deep-100M/hnswlib/M12", + "search_params": [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, @@ -31,14 +29,14 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-100M/hnswlib/M12" + "search_result_file": "result/deep-100M/hnswlib/M12" }, { - "name" : "hnswlib.M16", - "algo" : "hnswlib", + "name": "hnswlib.M16", + "algo": "hnswlib", "build_param": {"M":16, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-100M/hnswlib/M16", - "search_params" : [ + "file": "index/deep-100M/hnswlib/M16", + "search_params": [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, @@ -50,14 +48,14 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-100M/hnswlib/M16" + "search_result_file": "result/deep-100M/hnswlib/M16" }, { - "name" : "hnswlib.M24", - "algo" : "hnswlib", + "name": "hnswlib.M24", + "algo": "hnswlib", "build_param": {"M":24, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-100M/hnswlib/M24", - "search_params" : [ + "file": "index/deep-100M/hnswlib/M24", + "search_params": [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, @@ -69,14 +67,14 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-100M/hnswlib/M24" + "search_result_file": "result/deep-100M/hnswlib/M24" }, { - "name" : "hnswlib.M36", - "algo" : "hnswlib", + "name": "hnswlib.M36", + "algo": "hnswlib", "build_param": {"M":36, "efConstruction":500, "numThreads":32}, - "file" : "index/deep-100M/hnswlib/M36", - "search_params" : [ + "file": "index/deep-100M/hnswlib/M36", + "search_params": [ {"ef":10, "numThreads":1}, {"ef":20, "numThreads":1}, {"ef":40, "numThreads":1}, @@ -88,14 +86,14 @@ {"ef":600, "numThreads":1}, {"ef":800, "numThreads":1} ], - "search_result_file" : "result/deep-100M/hnswlib/M36" + "search_result_file": "result/deep-100M/hnswlib/M36" }, { - "name" : "faiss_ivf_flat.nlist50K", - "algo" : "faiss_gpu_ivf_flat", + "name": "faiss_ivf_flat.nlist50K", + "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":50000}, - "file" : "index/deep-100M/faiss_ivf_flat/nlist50K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_flat/nlist50K", + "search_params": [ {"nprobe":20}, {"nprobe":30}, {"nprobe":40}, @@ -105,14 +103,14 @@ {"nprobe":500}, {"nprobe":1000} ], - "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist50K" + "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist50K" }, { - "name" : "faiss_ivf_flat.nlist100K", - "algo" : "faiss_gpu_ivf_flat", + "name": "faiss_ivf_flat.nlist100K", + "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":100000}, - "file" : "index/deep-100M/faiss_ivf_flat/nlist100K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_flat/nlist100K", + "search_params": [ {"nprobe":20}, {"nprobe":30}, {"nprobe":40}, @@ -122,14 +120,14 @@ {"nprobe":500}, {"nprobe":1000} ], - "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist100K" + "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist100K" }, { - "name" : "faiss_ivf_flat.nlist200K", - "algo" : "faiss_gpu_ivf_flat", + "name": "faiss_ivf_flat.nlist200K", + "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":200000}, - "file" : "index/deep-100M/faiss_ivf_flat/nlist200K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_flat/nlist200K", + "search_params": [ {"nprobe":20}, {"nprobe":30}, {"nprobe":40}, @@ -139,16 +137,14 @@ {"nprobe":500}, {"nprobe":1000} ], - "search_result_file" : "result/deep-100M/faiss_ivf_flat/nlist200K" + "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist200K" }, - - { - "name" : "faiss_ivf_pq.M48-nlist16K", - "algo" : "faiss_gpu_ivf_pq", + "name": "faiss_ivf_pq.M48-nlist16K", + "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":16384, "M":48}, - "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist16K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_pq/M48-nlist16K", + "search_params": [ {"nprobe":10}, {"nprobe":20}, {"nprobe":30}, @@ -158,14 +154,14 @@ {"nprobe":200}, {"nprobe":500} ], - "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist16K" + "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist16K" }, { - "name" : "faiss_ivf_pq.M48-nlist50K", - "algo" : "faiss_gpu_ivf_pq", + "name": "faiss_ivf_pq.M48-nlist50K", + "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":50000, "M":48}, - "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist50K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_pq/M48-nlist50K", + "search_params": [ {"nprobe":20}, {"nprobe":30}, {"nprobe":40}, @@ -175,14 +171,14 @@ {"nprobe":500}, {"nprobe":1000} ], - "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist50K" + "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist50K" }, { - "name" : "faiss_ivf_pq.M48-nlist100K", - "algo" : "faiss_gpu_ivf_pq", + "name": "faiss_ivf_pq.M48-nlist100K", + "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":100000, "M":48}, - "file" : "index/deep-100M/faiss_ivf_pq/M48-nlist100K", - "search_params" : [ + "file": "index/deep-100M/faiss_ivf_pq/M48-nlist100K", + "search_params": [ {"nprobe":20}, {"nprobe":30}, {"nprobe":40}, @@ -192,20 +188,18 @@ {"nprobe":500}, {"nprobe":1000} ], - "search_result_file" : "result/deep-100M/faiss_ivf_pq/M48-nlist100K" + "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist100K" }, - - { - "name" : "ivf_flat.nlist100K", - "algo" : "ivf_flat", + "name": "ivf_flat.nlist100K", + "algo": "ivf_flat", "build_param": { - "nlist" : 100000, - "niter" : 25, - "ratio" : 5 + "nlist": 100000, + "niter": 25, + "ratio": 5 }, - "file" : "index/deep-100M/ivf_flat/nlist100K", - "search_params" : [ + "file": "index/deep-100M/ivf_flat/nlist100K", + "search_params": [ {"max_batch":10000, "max_k":10, "nprobe":20}, {"max_batch":10000, "max_k":10, "nprobe":30}, {"max_batch":10000, "max_k":10, "nprobe":40}, @@ -215,331 +209,427 @@ {"max_batch":10000, "max_k":10, "nprobe":500}, {"max_batch":10000, "max_k":10, "nprobe":1000} ], - "search_result_file" : "result/deep-100M/ivf_flat/nlist100K" + "search_result_file": "result/deep-100M/ivf_flat/nlist100K" }, - { - "name" : "cagra.dim32", - "algo" : "raft_cagra", + "name": "cagra.dim32", + "algo": "raft_cagra", "build_param": { "index_dim": 32, "intermediate_graph_degree": 48 }, - "file": "index/deep-100M/cagra/dim32", - "search_params": [ - { - "itopk": 32, - "search_width": 1, - "max_iterations": 0, - "algo": "single_cta" - }, - { - "itopk": 32, - "search_width": 1, - "max_iterations": 32, - "algo": "single_cta" - }, - { - "itopk": 64, - "search_width": 4, - "max_iterations": 16, - "algo": "single_cta" - }, - { - "itopk": 64, - "search_width": 1, - "max_iterations": 64, - "algo": "single_cta" - }, - { - "itopk": 96, - "search_width": 2, - "max_iterations": 48, - "algo": "single_cta" - }, - { - "itopk": 128, - "search_width": 8, - "max_iterations": 16, - "algo": "single_cta" - }, - { - "itopk": 128, - "search_width": 2, - "max_iterations": 64, - "algo": "single_cta" - }, - { - "itopk": 192, - "search_width": 8, - "max_iterations": 24, - "algo": "single_cta" - }, - { - "itopk": 192, - "search_width": 2, - "max_iterations": 96, - "algo": "single_cta" - }, - { - "itopk": 256, - "search_width": 8, - "max_iterations": 32, - "algo": "single_cta" - }, - { - "itopk": 384, - "search_width": 8, - "max_iterations": 48, - "algo": "single_cta" - }, - { - "itopk": 512, - "search_width": 8, - "max_iterations": 64, - "algo": "single_cta" - } - ], - "search_result_file": "result/deep-100M/cagra/dim32" - }, - { - "name": "cagra.dim32.multi_cta", - "algo": "raft_cagra", - "build_param": { - "index_dim": 32, - "intermediate_graph_degree": 48 - }, - "file": "index/deep-100M/cagra/dim32", - "search_params": [ - { - "itopk": 32, - "search_width": 1, - "max_iterations": 0, - "algo": "multi_cta" - }, - { - "itopk": 32, - "search_width": 1, - "max_iterations": 32, - "algo": "multi_cta" - }, - { - "itopk": 64, - "search_width": 4, - "max_iterations": 16, - "algo": "multi_cta" - }, - { - "itopk": 64, - "search_width": 1, - "max_iterations": 64, - "algo": "multi_cta" - }, - { - "itopk": 96, - "search_width": 2, - "max_iterations": 48, - "algo": "multi_cta" - }, - { - "itopk": 128, - "search_width": 8, - "max_iterations": 16, - "algo": "multi_cta" - }, - { - "itopk": 128, - "search_width": 2, - "max_iterations": 64, - "algo": "multi_cta" - }, - { - "itopk": 192, - "search_width": 8, - "max_iterations": 24, - "algo": "multi_cta" - }, - { - "itopk": 192, - "search_width": 2, - "max_iterations": 96, - "algo": "multi_cta" - }, - { - "itopk": 256, - "search_width": 8, - "max_iterations": 32, - "algo": "multi_cta" - }, - { - "itopk": 384, - "search_width": 8, - "max_iterations": 48, - "algo": "multi_cta" - }, - { - "itopk": 512, - "search_width": 8, - "max_iterations": 64, - "algo": "multi_cta" - } - ], - "search_result_file": "result/deep-100M/cagra/dim32_multi_cta" - }, - { - "name": "cagra.dim32.multi_kernel", - "algo": "raft_cagra", - "build_param": { - "index_dim": 32, - "intermediate_graph_degree": 48 - }, - "file": "index/deep-100M/cagra/dim32", - "search_params": [ - { - "itopk": 32, - "search_width": 1, - "max_iterations": 0, - "algo": "multi_kernel" - }, - { - "itopk": 32, - "search_width": 1, - "max_iterations": 32, - "algo": "multi_kernel" - }, - { - "itopk": 64, - "search_width": 4, - "max_iterations": 16, - "algo": "multi_kernel" - }, - { - "itopk": 64, - "search_width": 1, - "max_iterations": 64, - "algo": "multi_kernel" - }, - { - "itopk": 96, - "search_width": 2, - "max_iterations": 48, - "algo": "multi_kernel" - }, - { - "itopk": 128, - "search_width": 8, - "max_iterations": 16, - "algo": "multi_kernel" - }, - { - "itopk": 128, - "search_width": 2, - "max_iterations": 64, - "algo": "multi_kernel" - }, - { - "itopk": 192, - "search_width": 8, - "max_iterations": 24, - "algo": "multi_kernel" - }, - { - "itopk": 192, - "search_width": 2, - "max_iterations": 96, - "algo": "multi_kernel" - }, - { - "itopk": 256, - "search_width": 8, - "max_iterations": 32, - "algo": "multi_kernel" - }, - { - "itopk": 384, - "search_width": 8, - "max_iterations": 48, - "algo": "multi_kernel" - }, - { - "itopk": 512, - "search_width": 8, - "max_iterations": 64, - "algo": "multi_kernel" - } - ], - "search_result_file": "result/deep-100M/cagra/dim32_multi_kernel" - }, - { - "name": "cagra.dim64", - "algo": "raft_cagra", - "build_param": { - "index_dim": 64 - }, - "file": "index/deep-100M/cagra/dim64", - "search_params" : [ + "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "search_params": [ { "itopk": 32, "search_width": 1, - "max_iterations": 0 + "max_iterations": 0, + "algo": "single_cta" }, { "itopk": 32, "search_width": 1, - "max_iterations": 32 + "max_iterations": 32, + "algo": "single_cta" }, { "itopk": 64, "search_width": 4, - "max_iterations": 16 + "max_iterations": 16, + "algo": "single_cta" }, { "itopk": 64, "search_width": 1, - "max_iterations": 64 + "max_iterations": 64, + "algo": "single_cta" }, { "itopk": 96, "search_width": 2, - "max_iterations": 48 + "max_iterations": 48, + "algo": "single_cta" }, { "itopk": 128, "search_width": 8, - "max_iterations": 16 + "max_iterations": 16, + "algo": "single_cta" }, { "itopk": 128, "search_width": 2, - "max_iterations": 64 + "max_iterations": 64, + "algo": "single_cta" }, { "itopk": 192, "search_width": 8, - "max_iterations": 24 + "max_iterations": 24, + "algo": "single_cta" }, { "itopk": 192, "search_width": 2, - "max_iterations": 96 + "max_iterations": 96, + "algo": "single_cta" }, { "itopk": 256, "search_width": 8, - "max_iterations": 32 + "max_iterations": 32, + "algo": "single_cta" }, { "itopk": 384, "search_width": 8, - "max_iterations": 48 + "max_iterations": 48, + "algo": "single_cta" }, { "itopk": 512, "search_width": 8, - "max_iterations": 64 + "max_iterations": 64, + "algo": "single_cta" } ], - "search_result_file" : "result/deep-100M/cagra/dim32" + "search_result_file": "result/deep-100M/cagra/dim32_single_cta" + }, + { + "name": "cagra.dim32.multi_cta", + "algo": "raft_cagra", + "build_param": { + "index_dim": 32, + "intermediate_graph_degree": 48 + }, + "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "search_params": [ + { + "itopk": 32, + "search_width": 1, + "max_iterations": 0, + "algo": "multi_cta" + }, + { + "itopk": 32, + "search_width": 1, + "max_iterations": 32, + "algo": "multi_cta" + }, + { + "itopk": 64, + "search_width": 4, + "max_iterations": 16, + "algo": "multi_cta" + }, + { + "itopk": 64, + "search_width": 1, + "max_iterations": 64, + "algo": "multi_cta" + }, + { + "itopk": 96, + "search_width": 2, + "max_iterations": 48, + "algo": "multi_cta" + }, + { + "itopk": 128, + "search_width": 8, + "max_iterations": 16, + "algo": "multi_cta" + }, + { + "itopk": 128, + "search_width": 2, + "max_iterations": 64, + "algo": "multi_cta" + }, + { + "itopk": 192, + "search_width": 8, + "max_iterations": 24, + "algo": "multi_cta" + }, + { + "itopk": 192, + "search_width": 2, + "max_iterations": 96, + "algo": "multi_cta" + }, + { + "itopk": 256, + "search_width": 8, + "max_iterations": 32, + "algo": "multi_cta" + }, + { + "itopk": 384, + "search_width": 8, + "max_iterations": 48, + "algo": "multi_cta" + }, + { + "itopk": 512, + "search_width": 8, + "max_iterations": 64, + "algo": "multi_cta" + } + ], + "search_result_file": "result/deep-100M/cagra/dim32_multi_cta" + }, + { + "name": "raftcagra.dim32.multi_kernel", + "algo": "raft_cagra", + "build_param": { + "index_dim": 32, + "intermediate_graph_degree": 48 + }, + "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "search_params": [ + { + "itopk": 32, + "search_width": 1, + "max_iterations": 0, + "algo": "multi_kernel" + }, + { + "itopk": 32, + "search_width": 1, + "max_iterations": 32, + "algo": "multi_kernel" + }, + { + "itopk": 64, + "search_width": 4, + "max_iterations": 16, + "algo": "multi_kernel" + }, + { + "itopk": 64, + "search_width": 1, + "max_iterations": 64, + "algo": "multi_kernel" + }, + { + "itopk": 96, + "search_width": 2, + "max_iterations": 48, + "algo": "multi_kernel" + }, + { + "itopk": 128, + "search_width": 8, + "max_iterations": 16, + "algo": "multi_kernel" + }, + { + "itopk": 128, + "search_width": 2, + "max_iterations": 64, + "algo": "multi_kernel" + }, + { + "itopk": 192, + "search_width": 8, + "max_iterations": 24, + "algo": "multi_kernel" + }, + { + "itopk": 192, + "search_width": 2, + "max_iterations": 96, + "algo": "multi_kernel" + }, + { + "itopk": 256, + "search_width": 8, + "max_iterations": 32, + "algo": "multi_kernel" + }, + { + "itopk": 384, + "search_width": 8, + "max_iterations": 48, + "algo": "multi_kernel" + }, + { + "itopk": 512, + "search_width": 8, + "max_iterations": 64, + "algo": "multi_kernel" + } + ], + "search_result_file": "result/deep-100M/cagra/dim32_multi_kernel" + }, + { + "name": "cagra_orig.dim32", + "algo": "cagra", + "build_param": { + "index_dim": 32, + "intermediate_graph_degree": 48 + }, + "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "search_params": [ + { + "itopk": 32, + "search_width": 1, + "max_iterations": 0, + "algo": "single_cta" + }, + { + "itopk": 32, + "search_width": 1, + "max_iterations": 32, + "algo": "single_cta" + }, + { + "itopk": 64, + "search_width": 4, + "max_iterations": 16, + "algo": "single_cta" + }, + { + "itopk": 64, + "search_width": 1, + "max_iterations": 64, + "algo": "single_cta" + }, + { + "itopk": 96, + "search_width": 2, + "max_iterations": 48, + "algo": "single_cta" + }, + { + "itopk": 128, + "search_width": 8, + "max_iterations": 16, + "algo": "single_cta" + }, + { + "itopk": 128, + "search_width": 2, + "max_iterations": 64, + "algo": "single_cta" + }, + { + "itopk": 192, + "search_width": 8, + "max_iterations": 24, + "algo": "single_cta" + }, + { + "itopk": 192, + "search_width": 2, + "max_iterations": 96, + "algo": "single_cta" + }, + { + "itopk": 256, + "search_width": 8, + "max_iterations": 32, + "algo": "single_cta" + }, + { + "itopk": 384, + "search_width": 8, + "max_iterations": 48, + "algo": "single_cta" + }, + { + "itopk": 512, + "search_width": 8, + "max_iterations": 64, + "algo": "single_cta" + } + ], + "search_result_file": "result/deep-100M/cagra_orig/dim32_single_cta" + }, + { + "name": "cagra_orig.dim32.multi_cta", + "algo": "cagra", + "build_param": { + "index_dim": 32, + "intermediate_graph_degree": 48 + }, + "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "search_params": [ + { + "itopk": 32, + "search_width": 1, + "max_iterations": 0, + "algo": "multi_cta" + }, + { + "itopk": 32, + "search_width": 1, + "max_iterations": 32, + "algo": "multi_cta" + }, + { + "itopk": 64, + "search_width": 4, + "max_iterations": 16, + "algo": "multi_cta" + }, + { + "itopk": 64, + "search_width": 1, + "max_iterations": 64, + "algo": "multi_cta" + }, + { + "itopk": 96, + "search_width": 2, + "max_iterations": 48, + "algo": "multi_cta" + }, + { + "itopk": 128, + "search_width": 8, + "max_iterations": 16, + "algo": "multi_cta" + }, + { + "itopk": 128, + "search_width": 2, + "max_iterations": 64, + "algo": "multi_cta" + }, + { + "itopk": 192, + "search_width": 8, + "max_iterations": 24, + "algo": "multi_cta" + }, + { + "itopk": 192, + "search_width": 2, + "max_iterations": 96, + "algo": "multi_cta" + }, + { + "itopk": 256, + "search_width": 8, + "max_iterations": 32, + "algo": "multi_cta" + }, + { + "itopk": 384, + "search_width": 8, + "max_iterations": 48, + "algo": "multi_cta" + }, + { + "itopk": 512, + "search_width": 8, + "max_iterations": 64, + "algo": "multi_cta" + } + ], + "search_result_file": "result/deep-100M/cagra_orig/dim32_multi_cta" } ] } diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 8870ec95f7..6bb943cec2 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -357,7 +357,7 @@ inline void search(const Dataset* dataset, const std::vector(index.algo, index.search_params[i]); - std::cout << "Cagra created search_param" << std::endl; + // std::cout << "Cagra created search_param" << std::endl; algo->set_search_param(*p_param); log_info("search with param: %s", index.search_params[i].dump().c_str()); diff --git a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h index 35d7155b2f..7a8eef461f 100644 --- a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h @@ -99,7 +99,7 @@ class Cagra : public ANN { void set_search_dataset(const T* dataset, size_t nrow) override { - std::cout << "Cagra set_search_dataset" << std::endl; + // std::cout << "Cagra set_search_dataset" << std::endl; dataset_ = dataset; if (nrow_ == 0) { nrow_ = nrow; @@ -115,7 +115,7 @@ class Cagra : public ANN { using ANN::dim_; SearchParam search_param_; - void* plan_; + void* plan_{nullptr}; const T* dataset_{nullptr}; size_t nrow_{0}; @@ -188,7 +188,7 @@ void Cagra::build(const T*, size_t, cudaStream_t) template void Cagra::set_search_param(const AnnSearchParam& param) { - std::cout << "Cagra set_search_param" << std::endl; + // std::cout << "Cagra set_search_param" << std::endl; if (!dataset_ || nrow_ == 0) { throw std::runtime_error("Cagra: dataset is not loaded"); } if (!graph_ || degree_ == 0) { throw std::runtime_error("Cagra: index is not loaded"); } @@ -205,7 +205,7 @@ void Cagra::set_search_param(const AnnSearchParam& param) // new_search_param.max_iterations != search_param_.max_iterations) { if (plan_) { - std::cout << "Cagra destroying plan" << std::endl; + // std::cout << "Cagra destroying plan" << std::endl; destroy_plan(plan_); } @@ -217,14 +217,14 @@ void Cagra::set_search_param(const AnnSearchParam& param) cudaMalloc(&tmp_neighbors_, sizeof(size_t) * new_search_param.batch_size * new_search_param.k)); // } search_param_ = new_search_param; - std::cout << "Cagra creating new plan" << std::endl; + // std::cout << "Cagra creating new plan" << std::endl; create_plan(&plan_, get_cagra_dtype(), 0, // team_size search_param_.search_mode, search_param_.k, search_param_.p.itopk_size, - search_param_.p.num_parents, + search_param_.p.search_width, search_param_.p.min_iterations, search_param_.p.max_iterations, search_param_.batch_size, @@ -264,7 +264,7 @@ void Cagra::search(const T* queries, // uint32_t neighbors_ptr = std::is_same::value ? tmp_neighbors_ - std::cout << "Cagra calling search" << std::endl; + // std::cout << "Cagra calling search" << std::endl; ::search(plan_, tmp_neighbors_, distances, @@ -277,12 +277,9 @@ void Cagra::search(const T* queries, nullptr, stream); - std::cout << "Cagra calling unaryop" << std::endl; - raft::linalg::unaryOp(neighbors, - tmp_neighbors_, - batch_size * k, - raft::cast_op(), - resource::get_cuda_stream(handle_)); + // std::cout << "Cagra calling unaryop" << std::endl; + raft::linalg::unaryOp(neighbors, tmp_neighbors_, batch_size * k, raft::cast_op(), stream); + handle_.sync_stream(stream); } template @@ -310,7 +307,7 @@ void Cagra::save(const std::string& file) const template void Cagra::load(const std::string& file) { - std::cout << "Cagra load graph" << std::endl; + // std::cout << "Cagra load graph" << std::endl; FILE* fp = fopen(file.c_str(), "r"); if (!fp) { throw std::runtime_error("fail to open " + file); } @@ -333,7 +330,7 @@ void Cagra::load(const std::string& file) if (fread(h_graph, sizeof(*h_graph), total, fp) != total) { throw std::runtime_error("fread() " + file + " failed"); } - std::cout << "Cagra alloc device graph" << std::endl; + // std::cout << "Cagra alloc device graph" << std::endl; RAFT_CUDA_TRY(cudaMalloc(&graph_, sizeof(*graph_) * total)); RAFT_CUDA_TRY(cudaMemcpy(graph_, h_graph, sizeof(*graph_) * total, cudaMemcpyHostToDevice)); delete[] h_graph; diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 6c82f9c48d..4421bdeb04 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -163,6 +163,46 @@ void parse_search_param(const nlohmann::json& conf, } } } +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::BuildParam& param) +{ +} +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::SearchParam& param) +{ + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } + if (conf.contains("algo")) { + if (conf.at("algo") == "single_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; + param.search_mode = "single-cta"; + } else if (conf.at("algo") == "multi_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; + param.search_mode = "multi-cta"; + } else if (conf.at("algo") == "multi_kernel") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; + param.search_mode = "multi-kernel"; + } else if (conf.at("algo") == "auto") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; + } else { + std::string tmp = conf.at("algo"); + THROW("Invalid value for algo: %s", tmp.c_str()); + } + } + if (conf.contains("k")) { + param.k = conf.at("k"); + } else { + param.k = 10; + } + if (conf.contains("batch_size")) { + param.batch_size = conf.at("batch_size"); + } else { + param.batch_size = 10000; + }; +} #endif template @@ -210,7 +250,7 @@ std::unique_ptr> create_algo(const std::string& algo, } if (algo == "cagra") { typename raft::bench::ann::Cagra::BuildParam param; - parse_build_param(conf, param); + parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } #endif @@ -251,6 +291,11 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } + if (algo == "cagra") { + auto param = std::make_unique::SearchParam>(); + parse_search_param(conf, *param); + return param; + } #endif // else throw std::runtime_error("invalid algo: '" + algo + "'"); diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 1a191df756..0246260340 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -61,6 +61,7 @@ class RaftCagra : public ANN { void set_search_param(const AnnSearchParam& param) override; + void set_search_dataset(const T* dataset, size_t nrow) override; // TODO: if the number of results is less than k, the remaining elements of 'neighbors' // will be filled with (size_t)-1 void search(const T* queries, @@ -147,12 +148,12 @@ void RaftCagra::save(const std::string& file) const // of.close(); size_t degree = index_->graph_degree(); - std::cout << "Saving knn graph" << std::endl; - for (int i = 0; i < std::min(index_->size(), 10); i++) { - print_vector("k", index_->graph().data_handle() + i * degree, degree, std::cout); - } + // std::cout << "Saving knn graph" << std::endl; + // for (int i = 0; i < std::min(index_->size(), 10); i++) { + // print_vector("k", index_->graph().data_handle() + i * degree, degree, std::cout); + // } - // Orig CAGRA type of serialization + // 3. Orig CAGRA type of serialization std::ofstream of(file, std::ios::out | std::ios::binary); std::size_t size = index_->size(); // std::size_t degree = index_->graph_degree(); @@ -210,10 +211,10 @@ void RaftCagra::load(const std::string& file) graph_.data_handle(), graph_h.data_handle(), graph_.size(), resource::get_cuda_stream(handle_)); resource::sync_stream(handle_); - std::cout << "Loading knn graph" << std::endl; - for (int i = 0; i < std::min(graph_.extent(0), 10); i++) { - print_vector("k", graph_.data_handle() + i * degree, degree, std::cout); - } + // std::cout << "Loading knn graph" << std::endl; + // for (int i = 0; i < std::min(graph_.extent(0), 10); i++) { + // print_vector("k", graph_.data_handle() + i * degree, degree, std::cout); + // } } template diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 907f772ad1..2583afdaa9 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,7 @@ static_assert(std::is_aggregate_v); */ template struct index : ann::index { + using AlignDim = raft::Pow2<16 / sizeof(T)>; static_assert(!raft::is_narrowing_v, "IdxT must be able to represent all values of uint32_t"); @@ -250,7 +252,7 @@ struct index : ann::index { void update_dataset(raft::resources const& res, raft::device_matrix_view dataset) { - if (dataset.extent(1) * sizeof(T) % 16 != 0) { + if (dataset.extent(1) % AlignDim::Value != 0) { RAFT_LOG_DEBUG("Creating a padded copy of CAGRA dataset in device memory"); copy_padded(res, dataset); } else { From 49ca646c2f04249046995fafc2e5b749d0113666 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 31 Jul 2023 20:33:41 +0200 Subject: [PATCH 04/57] cagra pin dataset --- cpp/bench/ann/CMakeLists.txt | 8 +- cpp/bench/ann/conf/deep-100M.json | 497 ++++++++++++++---- cpp/bench/ann/src/common/benchmark.hpp | 1 - cpp/bench/ann/src/raft/raft_benchmark.cu | 108 ++-- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 1 - cpp/include/raft/neighbors/cagra_types.hpp | 66 ++- .../raft/neighbors/cuda_pinned_resource.hpp | 120 +++++ .../detail/cagra/cagra_serialize.cuh | 2 +- 8 files changed, 628 insertions(+), 175 deletions(-) create mode 100644 cpp/include/raft/neighbors/cuda_pinned_resource.hpp diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index c23f39dce4..d1ab8987c7 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -172,14 +172,14 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) PATH bench/ann/src/raft/raft_benchmark.cu $<$:bench/ann/src/raft/raft_cagra.cu> - $<$:bench/ann/src/raft/orig_cagra.cu> + # $<$:bench/ann/src/raft/orig_cagra.cu> LINKS raft::compiled ) - target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) - target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) + # target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) + # target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) - target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) + # target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) diff --git a/cpp/bench/ann/conf/deep-100M.json b/cpp/bench/ann/conf/deep-100M.json index 93fa3e52eb..3338848c11 100644 --- a/cpp/bench/ann/conf/deep-100M.json +++ b/cpp/bench/ann/conf/deep-100M.json @@ -7,7 +7,7 @@ "distance": "euclidean" }, "search_basic_param": { - "batch_size" : 10000, + "batch_size": 10000, "k": 10, "run_count": 2 }, @@ -15,178 +15,425 @@ { "name": "hnswlib.M12", "algo": "hnswlib", - "build_param": {"M":12, "efConstruction":500, "numThreads":32}, + "build_param": { + "M": 12, + "efConstruction": 500, + "numThreads": 32 + }, "file": "index/deep-100M/hnswlib/M12", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":60, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} + { + "ef": 10, + "numThreads": 1 + }, + { + "ef": 20, + "numThreads": 1 + }, + { + "ef": 40, + "numThreads": 1 + }, + { + "ef": 60, + "numThreads": 1 + }, + { + "ef": 80, + "numThreads": 1 + }, + { + "ef": 120, + "numThreads": 1 + }, + { + "ef": 200, + "numThreads": 1 + }, + { + "ef": 400, + "numThreads": 1 + }, + { + "ef": 600, + "numThreads": 1 + }, + { + "ef": 800, + "numThreads": 1 + } ], "search_result_file": "result/deep-100M/hnswlib/M12" }, { "name": "hnswlib.M16", "algo": "hnswlib", - "build_param": {"M":16, "efConstruction":500, "numThreads":32}, + "build_param": { + "M": 16, + "efConstruction": 500, + "numThreads": 32 + }, "file": "index/deep-100M/hnswlib/M16", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":60, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} + { + "ef": 10, + "numThreads": 1 + }, + { + "ef": 20, + "numThreads": 1 + }, + { + "ef": 40, + "numThreads": 1 + }, + { + "ef": 60, + "numThreads": 1 + }, + { + "ef": 80, + "numThreads": 1 + }, + { + "ef": 120, + "numThreads": 1 + }, + { + "ef": 200, + "numThreads": 1 + }, + { + "ef": 400, + "numThreads": 1 + }, + { + "ef": 600, + "numThreads": 1 + }, + { + "ef": 800, + "numThreads": 1 + } ], "search_result_file": "result/deep-100M/hnswlib/M16" }, { "name": "hnswlib.M24", "algo": "hnswlib", - "build_param": {"M":24, "efConstruction":500, "numThreads":32}, + "build_param": { + "M": 24, + "efConstruction": 500, + "numThreads": 32 + }, "file": "index/deep-100M/hnswlib/M24", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":60, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} + { + "ef": 10, + "numThreads": 1 + }, + { + "ef": 20, + "numThreads": 1 + }, + { + "ef": 40, + "numThreads": 1 + }, + { + "ef": 60, + "numThreads": 1 + }, + { + "ef": 80, + "numThreads": 1 + }, + { + "ef": 120, + "numThreads": 1 + }, + { + "ef": 200, + "numThreads": 1 + }, + { + "ef": 400, + "numThreads": 1 + }, + { + "ef": 600, + "numThreads": 1 + }, + { + "ef": 800, + "numThreads": 1 + } ], "search_result_file": "result/deep-100M/hnswlib/M24" }, { "name": "hnswlib.M36", "algo": "hnswlib", - "build_param": {"M":36, "efConstruction":500, "numThreads":32}, + "build_param": { + "M": 36, + "efConstruction": 500, + "numThreads": 32 + }, "file": "index/deep-100M/hnswlib/M36", "search_params": [ - {"ef":10, "numThreads":1}, - {"ef":20, "numThreads":1}, - {"ef":40, "numThreads":1}, - {"ef":60, "numThreads":1}, - {"ef":80, "numThreads":1}, - {"ef":120, "numThreads":1}, - {"ef":200, "numThreads":1}, - {"ef":400, "numThreads":1}, - {"ef":600, "numThreads":1}, - {"ef":800, "numThreads":1} + { + "ef": 10, + "numThreads": 1 + }, + { + "ef": 20, + "numThreads": 1 + }, + { + "ef": 40, + "numThreads": 1 + }, + { + "ef": 60, + "numThreads": 1 + }, + { + "ef": 80, + "numThreads": 1 + }, + { + "ef": 120, + "numThreads": 1 + }, + { + "ef": 200, + "numThreads": 1 + }, + { + "ef": 400, + "numThreads": 1 + }, + { + "ef": 600, + "numThreads": 1 + }, + { + "ef": 800, + "numThreads": 1 + } ], "search_result_file": "result/deep-100M/hnswlib/M36" }, { "name": "faiss_ivf_flat.nlist50K", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":50000}, + "build_param": { + "nlist": 50000 + }, "file": "index/deep-100M/faiss_ivf_flat/nlist50K", "search_params": [ - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist50K" }, { "name": "faiss_ivf_flat.nlist100K", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":100000}, + "build_param": { + "nlist": 100000 + }, "file": "index/deep-100M/faiss_ivf_flat/nlist100K", "search_params": [ - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist100K" }, { "name": "faiss_ivf_flat.nlist200K", "algo": "faiss_gpu_ivf_flat", - "build_param": {"nlist":200000}, + "build_param": { + "nlist": 200000 + }, "file": "index/deep-100M/faiss_ivf_flat/nlist200K", "search_params": [ - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/faiss_ivf_flat/nlist200K" }, { "name": "faiss_ivf_pq.M48-nlist16K", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":16384, "M":48}, + "build_param": { + "nlist": 16384, + "M": 48 + }, "file": "index/deep-100M/faiss_ivf_pq/M48-nlist16K", "search_params": [ - {"nprobe":10}, - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500} + { + "nprobe": 10 + }, + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + } ], "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist16K" }, { "name": "faiss_ivf_pq.M48-nlist50K", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":50000, "M":48}, + "build_param": { + "nlist": 50000, + "M": 48 + }, "file": "index/deep-100M/faiss_ivf_pq/M48-nlist50K", "search_params": [ - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist50K" }, { "name": "faiss_ivf_pq.M48-nlist100K", "algo": "faiss_gpu_ivf_pq", - "build_param": {"nlist":100000, "M":48}, + "build_param": { + "nlist": 100000, + "M": 48 + }, "file": "index/deep-100M/faiss_ivf_pq/M48-nlist100K", "search_params": [ - {"nprobe":20}, - {"nprobe":30}, - {"nprobe":40}, - {"nprobe":50}, - {"nprobe":100}, - {"nprobe":200}, - {"nprobe":500}, - {"nprobe":1000} + { + "nprobe": 20 + }, + { + "nprobe": 30 + }, + { + "nprobe": 40 + }, + { + "nprobe": 50 + }, + { + "nprobe": 100 + }, + { + "nprobe": 200 + }, + { + "nprobe": 500 + }, + { + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/faiss_ivf_pq/M48-nlist100K" }, @@ -200,14 +447,46 @@ }, "file": "index/deep-100M/ivf_flat/nlist100K", "search_params": [ - {"max_batch":10000, "max_k":10, "nprobe":20}, - {"max_batch":10000, "max_k":10, "nprobe":30}, - {"max_batch":10000, "max_k":10, "nprobe":40}, - {"max_batch":10000, "max_k":10, "nprobe":50}, - {"max_batch":10000, "max_k":10, "nprobe":100}, - {"max_batch":10000, "max_k":10, "nprobe":200}, - {"max_batch":10000, "max_k":10, "nprobe":500}, - {"max_batch":10000, "max_k":10, "nprobe":1000} + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 20 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 30 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 40 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 50 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 100 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 200 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 500 + }, + { + "max_batch": 10000, + "max_k": 10, + "nprobe": 1000 + } ], "search_result_file": "result/deep-100M/ivf_flat/nlist100K" }, @@ -380,7 +659,7 @@ "search_result_file": "result/deep-100M/cagra/dim32_multi_cta" }, { - "name": "raftcagra.dim32.multi_kernel", + "name": "raft_cagra.dim32.multi_kernel", "algo": "raft_cagra", "build_param": { "index_dim": 32, @@ -632,4 +911,4 @@ "search_result_file": "result/deep-100M/cagra_orig/dim32_multi_cta" } ] -} +} \ No newline at end of file diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 6bb943cec2..28df4640ee 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -357,7 +357,6 @@ inline void search(const Dataset* dataset, const std::vector(index.algo, index.search_params[i]); - // std::cout << "Cagra created search_param" << std::endl; algo->set_search_param(*p_param); log_info("search with param: %s", index.search_params[i].dump().c_str()); diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 4421bdeb04..8b1a2cfc22 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -45,10 +45,10 @@ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; -#include "orig_cagra_wrapper.h" -extern template class raft::bench::ann::Cagra; -extern template class raft::bench::ann::Cagra; -extern template class raft::bench::ann::Cagra; +// #include "orig_cagra_wrapper.h" +// extern template class raft::bench::ann::Cagra; +// extern template class raft::bench::ann::Cagra; +// extern template class raft::bench::ann::Cagra; #endif #define JSON_DIAGNOSTICS 1 #include @@ -163,46 +163,46 @@ void parse_search_param(const nlohmann::json& conf, } } } -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::Cagra::BuildParam& param) -{ -} -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::Cagra::SearchParam& param) -{ - if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } - if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } - if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } - if (conf.contains("algo")) { - if (conf.at("algo") == "single_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; - param.search_mode = "single-cta"; - } else if (conf.at("algo") == "multi_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; - param.search_mode = "multi-cta"; - } else if (conf.at("algo") == "multi_kernel") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; - param.search_mode = "multi-kernel"; - } else if (conf.at("algo") == "auto") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; - } else { - std::string tmp = conf.at("algo"); - THROW("Invalid value for algo: %s", tmp.c_str()); - } - } - if (conf.contains("k")) { - param.k = conf.at("k"); - } else { - param.k = 10; - } - if (conf.contains("batch_size")) { - param.batch_size = conf.at("batch_size"); - } else { - param.batch_size = 10000; - }; -} +// template +// void parse_build_param(const nlohmann::json& conf, +// typename raft::bench::ann::Cagra::BuildParam& param) +// { +// } +// template +// void parse_search_param(const nlohmann::json& conf, +// typename raft::bench::ann::Cagra::SearchParam& param) +// { +// if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } +// if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } +// if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } +// if (conf.contains("algo")) { +// if (conf.at("algo") == "single_cta") { +// param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; +// param.search_mode = "single-cta"; +// } else if (conf.at("algo") == "multi_cta") { +// param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; +// param.search_mode = "multi-cta"; +// } else if (conf.at("algo") == "multi_kernel") { +// param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; +// param.search_mode = "multi-kernel"; +// } else if (conf.at("algo") == "auto") { +// param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; +// } else { +// std::string tmp = conf.at("algo"); +// THROW("Invalid value for algo: %s", tmp.c_str()); +// } +// } +// if (conf.contains("k")) { +// param.k = conf.at("k"); +// } else { +// param.k = 10; +// } +// if (conf.contains("batch_size")) { +// param.batch_size = conf.at("batch_size"); +// } else { +// param.batch_size = 10000; +// }; +// } #endif template @@ -248,11 +248,11 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } - if (algo == "cagra") { - typename raft::bench::ann::Cagra::BuildParam param; - parse_build_param(conf, param); - ann = std::make_unique>(metric, dim, param); - } + // if (algo == "cagra") { + // typename raft::bench::ann::Cagra::BuildParam param; + // parse_build_param(conf, param); + // ann = std::make_unique>(metric, dim, param); + // } #endif if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } @@ -291,11 +291,11 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } - if (algo == "cagra") { - auto param = std::make_unique::SearchParam>(); - parse_search_param(conf, *param); - return param; - } + // if (algo == "cagra") { + // auto param = std::make_unique::SearchParam>(); + // parse_search_param(conf, *param); + // return param; + // } #endif // else throw std::runtime_error("invalid algo: '" + algo + "'"); diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 0246260340..df3bd27cbf 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -76,7 +76,6 @@ class RaftCagra : public ANN { { AlgoProperty property; property.dataset_memory_type = MemoryType::HostMmap; - property.dataset_memory_type = MemoryType::HostMmap; property.query_memory_type = MemoryType::Device; property.need_dataset_when_search = true; return property; diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 2583afdaa9..1f50dd1f03 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -34,6 +34,10 @@ #include #include +#include "cuda_pinned_resource.hpp" +#include +#include + #include namespace raft::neighbors::cagra { /** @@ -165,9 +169,12 @@ struct index : ann::index { /** Construct an empty index. */ index(raft::resources const& res) : ann::index(), + mr_(new rmm::mr::cuda_pinned_resource()), metric_(raft::distance::DistanceType::L2Expanded), dataset_(make_device_matrix(res, 0, 0)), - graph_(make_device_matrix(res, 0, 0)) + dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), + graph_(make_device_matrix(res, 0, 0)), + graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) { } @@ -229,16 +236,61 @@ struct index : ann::index { index(raft::resources const& res, raft::distance::DistanceType metric, mdspan, row_major, data_accessor> dataset, - mdspan, row_major, graph_accessor> knn_graph) + mdspan, row_major, graph_accessor> knn_graph, + bool graph_pinned = true, + bool data_pinned = true) : ann::index(), + mr_(new rmm::mr::cuda_pinned_resource()), metric_(metric), dataset_(make_device_matrix(res, 0, 0)), - graph_(make_device_matrix(res, 0, 0)) + dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), + graph_(make_device_matrix(res, 0, 0)), + graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); - update_dataset(res, dataset); - update_graph(res, knn_graph); + if (data_pinned) { + // copy with padding + int64_t aligned_dim = AlignDim::roundUp(dataset.extent(1)); + dataset_pinned_.resize(dataset.extent(0) * aligned_dim, resource::get_cuda_stream(res)); + resource::sync_stream(res); + + RAFT_LOG_INFO("Allocated pinned dataset"); + + memset(dataset_pinned_.data(), 0, dataset_pinned_.size() * sizeof(T)); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(dataset_pinned_.data(), + sizeof(T) * aligned_dim, + dataset.data_handle(), + sizeof(T) * dataset.extent(1), + sizeof(T) * dataset.extent(1), + dataset.extent(0), + cudaMemcpyDefault, + resource::get_cuda_stream(res))); + + dataset_view_ = make_device_strided_matrix_view( + dataset_pinned_.data(), dataset.extent(0), dataset.extent(1), aligned_dim); + RAFT_LOG_INFO("CAGRA dataset strided matrix view %zux%zu, stride %zu", + static_cast(dataset_view_.extent(0)), + static_cast(dataset_view_.extent(1)), + static_cast(dataset_view_.stride(0))); + } else { + update_dataset(res, dataset); + } + if (graph_pinned) { + graph_pinned_.resize(knn_graph.size(), resource::get_cuda_stream(res)); + resource::sync_stream(res); + RAFT_LOG_INFO("Allocated pinned graph"); + + memset(graph_pinned_.data(), 0, sizeof(IdxT) * graph_pinned_.size()); + graph_view_ = make_device_matrix_view( + graph_pinned_.data(), knn_graph.extent(0), knn_graph.extent(1)); + raft::copy(graph_pinned_.data(), + knn_graph.data_handle(), + knn_graph.size(), + resource::get_cuda_stream(res)); + } else { + update_graph(res, knn_graph); + } resource::sync_stream(res); } @@ -336,9 +388,13 @@ struct index : ann::index { static_cast(dataset_view_.stride(0))); } + private: + std::unique_ptr mr_; raft::distance::DistanceType metric_; raft::device_matrix dataset_; + rmm::device_uvector dataset_pinned_; raft::device_matrix graph_; + rmm::device_uvector graph_pinned_; raft::device_matrix_view dataset_view_; raft::device_matrix_view graph_view_; }; diff --git a/cpp/include/raft/neighbors/cuda_pinned_resource.hpp b/cpp/include/raft/neighbors/cuda_pinned_resource.hpp new file mode 100644 index 0000000000..2d0cb602be --- /dev/null +++ b/cpp/include/raft/neighbors/cuda_pinned_resource.hpp @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include + +namespace rmm::mr { +/** + * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for + * allocation/deallocation. + */ +class cuda_pinned_resource final : public device_memory_resource { + public: + cuda_pinned_resource() = default; + ~cuda_pinned_resource() override = default; + cuda_pinned_resource(cuda_pinned_resource const&) = default; + cuda_pinned_resource(cuda_pinned_resource&&) = default; + cuda_pinned_resource& operator=(cuda_pinned_resource const&) = default; + cuda_pinned_resource& operator=(cuda_pinned_resource&&) = default; + + /** + * @brief Query whether the resource supports use of non-null CUDA streams for + * allocation/deallocation. `cuda_pinned_resource` does not support streams. + * + * @returns bool false + */ + [[nodiscard]] bool supports_streams() const noexcept override { return false; } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return true + */ + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } + + private: + /** + * @brief Allocates memory of size at least `bytes` using cudaMalloc. + * + * The returned pointer has at least 256B alignment. + * + * @note Stream argument is ignored + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * + * @param bytes The size, in bytes, of the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, cuda_stream_view) override + { + void* ptr{nullptr}; + RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes)); + return ptr; + } + + /** + * @brief Deallocate memory pointed to by \p p. + * + * @note Stream argument is ignored. + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + */ + void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override + { + RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); + } + + /** + * @brief Compare this resource to another. + * + * Two cuda_pinned_resources always compare equal, because they can each + * deallocate memory allocated by the other. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override + { + return dynamic_cast(&other) != nullptr; + } + + /** + * @brief Get free and available memory for memory resource + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @return std::pair contaiing free_size and total_size of memory + */ + [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override + { + std::size_t free_size{}; + std::size_t total_size{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); + return std::make_pair(free_size, total_size); + } +}; +} // namespace rmm::mr \ No newline at end of file diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 8d040c352b..c8b75e8168 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -36,7 +36,7 @@ struct check_index_layout { "paste in the new size and consider updating the serialization logic"); }; -constexpr size_t expected_size = 200; +constexpr size_t expected_size = 288; template struct check_index_layout), expected_size>; /** From bded6749736d6de3daaba265d5da20946be1a80e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 11 Sep 2023 16:21:01 -0400 Subject: [PATCH 05/57] Adding FAISS cpu to raft-ann-bench --- build.sh | 2 +- cpp/bench/ann/CMakeLists.txt | 27 ++ .../ann/src/faiss/faiss_cpu_benchmark.cpp | 161 +++++++++ cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 329 ++++++++++++++++++ cpp/bench/ann/src/faiss/faiss_wrapper.h | 6 +- .../ann/src/hnswlib/hnswlib_benchmark.cpp | 1 - cpp/cmake/thirdparty/get_faiss.cmake | 2 +- 7 files changed, 520 insertions(+), 8 deletions(-) create mode 100644 cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp create mode 100644 cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h diff --git a/build.sh b/build.sh index 071820ba93..b2f3c15365 100755 --- a/build.sh +++ b/build.sh @@ -67,7 +67,7 @@ BUILD_DIRS="${LIBRAFT_BUILD_DIR} ${PYLIBRAFT_BUILD_DIR} ${RAFT_DASK_BUILD_DIR}" # Set defaults for vars modified by flags to this script CMAKE_LOG_LEVEL="" VERBOSE_FLAG="" -BUILD_ALL_GPU_ARCH=0 +BUILD_ALL_GPU_ARCH=1 BUILD_TESTS=OFF BUILD_TYPE=Release BUILD_PRIMS_BENCH=OFF diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 8985be328b..55e0838acd 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -18,6 +18,13 @@ option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN + "Include faiss' cpu brute-force knn algorithm in benchmark" ON +) +option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark" + ON +) +option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ "Include faiss' cpu ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT "Include raft's ivf flat algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ "Include raft's ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_RAFT_CAGRA "Include raft's CAGRA in benchmark" ON) @@ -58,6 +65,14 @@ if(RAFT_ANN_BENCH_USE_FAISS_BFKNN set(RAFT_ANN_BENCH_USE_FAISS ON) endif() +set(RAFT_ANN_BENCH_USE_FAISS_CPU OFF) +if(RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN + OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVFPQ + OR RAFT_ANN_BENCH_USE_FAISS_CPU_IFFLAT +) + set(RAFT_ANN_BENCH_USE_FAISS_CPU ON) +endif() + set(RAFT_ANN_BENCH_USE_RAFT OFF) if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT @@ -219,6 +234,18 @@ if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) ) endif() +if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) + ConfigureAnnBench( + NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cpp LINKS faiss::faiss + ) +endif() + +if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) + ConfigureAnnBench( + NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cpp LINKS faiss::faiss + ) +endif() + if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ) ConfigureAnnBench( NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp new file mode 100644 index 0000000000..1be39b3de2 --- /dev/null +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "../common/ann_types.hpp" +#include "faiss_cpu_wrapper.h" +#define JSON_DIAGNOSTICS 1 +#include + +namespace raft::bench::ann { + +template +void parse_base_build_param(const nlohmann::json& conf, + typename raft::bench::ann::Faiss::BuildParam& param) +{ + param.nlist = conf.at("nlist"); + if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); } +} + +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::FaissIVFFlat::BuildParam& param) +{ + parse_base_build_param(conf, param); +} + +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::FaissIVFPQ::BuildParam& param) +{ + parse_base_build_param(conf, param); + param.M = conf.at("M"); + if (conf.contains("usePrecomputed")) { + param.usePrecomputed = conf.at("usePrecomputed"); + } else { + param.usePrecomputed = false; + } + if (conf.contains("useFloat16")) { + param.useFloat16 = conf.at("useFloat16"); + } else { + param.useFloat16 = false; + } +} + +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::FaissIVFSQ::BuildParam& param) +{ + parse_base_build_param(conf, param); + param.quantizer_type = conf.at("quantizer_type"); +} + +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::Faiss::SearchParam& param) +{ + param.nprobe = conf.at("nprobe"); + if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); } +} + +template class Algo> +std::unique_ptr> make_algo(raft::bench::ann::Metric metric, + int dim, + const nlohmann::json& conf) +{ + typename Algo::BuildParam param; + parse_build_param(conf, param); + return std::make_unique>(metric, dim, param); +} + +template class Algo> +std::unique_ptr> make_algo(raft::bench::ann::Metric metric, + int dim, + const nlohmann::json& conf, + const std::vector& dev_list) +{ + typename Algo::BuildParam param; + parse_build_param(conf, param); + + (void)dev_list; + return std::make_unique>(metric, dim, param); +} + +template +std::unique_ptr> create_algo(const std::string& algo, + const std::string& distance, + int dim, + const nlohmann::json& conf, + const std::vector& dev_list) +{ + // stop compiler warning; not all algorithms support multi-GPU so it may not be used + (void)dev_list; + + std::unique_ptr> ann; + + if constexpr (std::is_same_v) { + raft::bench::ann::Metric metric = parse_metric(distance); + if (algo == "faiss_ivf_flat") { + ann = make_algo(metric, dim, conf, dev_list); + } else if (algo == "faiss_ivf_pq") { + ann = make_algo(metric, dim, conf); + } else if (algo == "faiss_ivf_sq") { + ann = make_algo(metric, dim, conf); + } else if (algo == "faiss_flat") { + ann = std::make_unique>(metric, dim); + } + } + + if constexpr (std::is_same_v) {} + + if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } + + return ann; +} + +template +std::unique_ptr::AnnSearchParam> create_search_param( + const std::string& algo, const nlohmann::json& conf) +{ + if (algo == "faiss_ivf_flat" || algo == "faiss_ivf_pq" || algo == "faiss_ivf_sq") { + auto param = std::make_unique::SearchParam>(); + parse_search_param(conf, *param); + return param; + } else if (algo == "faiss_flat") { + auto param = std::make_unique::AnnSearchParam>(); + return param; + } + // else + throw std::runtime_error("invalid algo: '" + algo + "'"); +} + +} // namespace raft::bench::ann + +REGISTER_ALGO_INSTANCE(float); +REGISTER_ALGO_INSTANCE(std::int8_t); +REGISTER_ALGO_INSTANCE(std::uint8_t); + +#ifdef ANN_BENCH_BUILD_MAIN +#include "../common/benchmark.hpp" +int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } +#endif diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h new file mode 100644 index 0000000000..7c62e34204 --- /dev/null +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "../common/ann_types.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace { + +faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric) +{ + if (metric == raft::bench::ann::Metric::kInnerProduct) { + return faiss::METRIC_INNER_PRODUCT; + } else if (metric == raft::bench::ann::Metric::kEuclidean) { + return faiss::METRIC_L2; + } else { + throw std::runtime_error("faiss supports only metric type of inner product and L2"); + } +} + +// note BLAS library can still use multi-threading, and +// setting environment variable like OPENBLAS_NUM_THREADS can control it +class OmpSingleThreadScope { + public: + OmpSingleThreadScope() + { + max_threads_ = omp_get_max_threads(); + omp_set_num_threads(1); + } + ~OmpSingleThreadScope() + { + // the best we can do + omp_set_num_threads(max_threads_); + } + + private: + int max_threads_; +}; + +} // namespace + +namespace raft::bench::ann { + +template +class Faiss : public ANN { + public: + using typename ANN::AnnSearchParam; + struct SearchParam : public AnnSearchParam { + int nprobe; + float refine_ratio = 1.0; + }; + + struct BuildParam { + int nlist = 1; + int ratio = 2; + }; + + Faiss(Metric metric, int dim, const BuildParam& param) + : ANN(metric, dim), + metric_type_(parse_metric_type(metric)), + nlist_{param.nlist}, + training_sample_fraction_{1.0 / double(param.ratio)} + { + static_assert(std::is_same_v, "faiss support only float type"); + } + + virtual ~Faiss() noexcept {} + + void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final; + + void set_search_param(const AnnSearchParam& param) override; + + // TODO: if the number of results is less than k, the remaining elements of 'neighbors' + // will be filled with (size_t)-1 + void search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream = 0) const final; + + AlgoProperty get_preference() const override + { + AlgoProperty property; + // to enable building big dataset which is larger than memory + property.dataset_memory_type = MemoryType::Host; + property.query_memory_type = MemoryType::Host; + return property; + } + + protected: + template + void save_(const std::string& file) const; + + template + void load_(const std::string& file); + + std::unique_ptr index_; + std::unique_ptr index_refine_; + faiss::MetricType metric_type_; + int nlist_; + double training_sample_fraction_; +}; + +template +void Faiss::build(const T* dataset, size_t nrow, cudaStream_t stream) +{ + OmpSingleThreadScope omp_single_thread; + auto index_ivf = dynamic_cast(index_.get()); + if (index_ivf != nullptr) { + // set the min/max training size for clustering to use the whole provided training set. + double trainset_size = training_sample_fraction_ * static_cast(nrow); + double points_per_centroid = trainset_size / static_cast(nlist_); + int max_ppc = std::ceil(points_per_centroid); + int min_ppc = std::floor(points_per_centroid); + if (min_ppc < index_ivf->cp.min_points_per_centroid) { + RAFT_LOG_WARN( + "The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d " + "points per cluster (n_lists = %d). This is smaller than the FAISS default " + "min_points_per_centroid = %d.", + static_cast(trainset_size), + nrow, + training_sample_fraction_, + min_ppc, + nlist_, + index_ivf->cp.min_points_per_centroid); + } + index_ivf->cp.max_points_per_centroid = max_ppc; + index_ivf->cp.min_points_per_centroid = min_ppc; + } + index_->train(nrow, dataset); // faiss::IndexFlat::train() will do nothing + assert(index_->is_trained); + index_->add(nrow, dataset); + stream_wait(stream); +} + +template +void Faiss::set_search_param(const AnnSearchParam& param) +{ + auto search_param = dynamic_cast(param); + int nprobe = search_param.nprobe; + assert(nprobe <= nlist_); + dynamic_cast(index_.get())->setNumProbes(nprobe); + + if (search_param.refine_ratio > 1.0) { + this->index_refine_ = std::make_unique(this->index_.get()); + this->index_refine_.get()->k_factor = search_param.refine_ratio; + } +} + +template +void Faiss::search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream) const +{ + static_assert(sizeof(size_t) == sizeof(faiss::Index::idx_t), + "sizes of size_t and faiss::Index::idx_t are different"); + index_->search( + batch_size, queries, k, distances, reinterpret_cast(neighbors)); +} + +template +template +void Faiss::save_(const std::string& file) const +{ + OmpSingleThreadScope omp_single_thread; + + auto cpu_index = std::make_unique(); + dynamic_cast(index_.get())->copyTo(cpu_index.get()); + faiss::write_index(cpu_index.get(), file.c_str()); +} + +template +template +void Faiss::load_(const std::string& file) +{ + OmpSingleThreadScope omp_single_thread; + + index_ = std::make_unique(dynamic_cast(faiss::read_index(file.c_str()))); +} + +template +class FaissIVFFlat : public Faiss { + public: + using typename Faiss::BuildParam; + + FaissIVFFlat(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + { + faiss::IndexIVFFlatConfig config; + config.device = this->device_; + this->index_ = std::make_unique( + &(this->_resource_), dim, param.nlist, this->metric_type_, config); + } + + void save(const std::string& file) const override + { + this->template save_(file); + } + void load(const std::string& file) override + { + this->template load_(file); + } +}; + +template +class FaissIVFPQ : public Faiss { + public: + struct BuildParam : public Faiss::BuildParam { + int M; + bool useFloat16; + bool usePrecomputed; + }; + + FaissIVFPQ(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + { + faiss::IndexIVFPQConfig config; + config.useFloat16LookupTables = param.useFloat16; + config.usePrecomputedTables = param.usePrecomputed; + config.device = this->device_; + this->index_ = std::make_unique(&(this->_resource_), + dim, + param.nlist, + param.M, + 8, // FAISS only supports bitsPerCode=8 + this->metric_type_, + config); + } + + void save(const std::string& file) const override + { + this->template save_(file); + } + void load(const std::string& file) override + { + this->template load_(file); + } +}; + +template +class FaissIVFSQ : public Faiss { + public: + struct BuildParam : public Faiss::BuildParam { + std::string quantizer_type; + }; + + FaissIVFSQ(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + { + faiss::ScalarQuantizer::QuantizerType qtype; + if (param.quantizer_type == "fp16") { + qtype = faiss::ScalarQuantizer::QT_fp16; + } else if (param.quantizer_type == "int8") { + qtype = faiss::ScalarQuantizer::QT_8bit; + } else { + throw std::runtime_error("FaissIVFSQ supports only fp16 and int8 but got " + + param.quantizer_type); + } + + faiss::IndexIVFScalarQuantizerConfig config; + config.device = this->device_; + this->index_ = std::make_unique( + &(this->_resource_), dim, param.nlist, qtype, this->metric_type_, true, config); + } + + void save(const std::string& file) const override + { + this->template save_(file); + } + void load(const std::string& file) override + { + this->template load_(file); + } +}; + +template +class FaissFlat : public Faiss { + public: + FaissFlat(Metric metric, int dim) : Faiss(metric, dim, typename Faiss::BuildParam{}) + { + faiss::IndexFlatConfig config; + config.device = this->device_; + this->index_ = + std::make_unique(&(this->_resource_), dim, this->metric_type_, config); + } + + // class Faiss is more like a IVF class, so need special treating here + void set_search_param(const typename ANN::AnnSearchParam&) override{}; + + void save(const std::string& file) const override + { + this->template save_(file); + } + void load(const std::string& file) override + { + this->template load_(file); + } +}; + +} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h index 672c685b1f..a426ad2df7 100644 --- a/cpp/bench/ann/src/faiss/faiss_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h @@ -13,9 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef FAISS_WRAPPER_H_ -#define FAISS_WRAPPER_H_ - +#pragma once #include "../common/ann_types.hpp" #include @@ -355,5 +353,3 @@ class FaissGpuFlat : public FaissGpu { }; } // namespace raft::bench::ann - -#endif diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp index 7d96e54989..1af19a22cb 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp +++ b/cpp/bench/ann/src/hnswlib/hnswlib_benchmark.cpp @@ -24,7 +24,6 @@ #include #include -#undef WARP_SIZE #include "hnswlib_wrapper.h" #define JSON_DIAGNOSTICS 1 #include diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index b7c132f2f1..6cdad23c69 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -84,4 +84,4 @@ find_and_configure_faiss(VERSION 1.7.0 REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} PINNED_TAG ${RAFT_FAISS_GIT_TAG} BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} - EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL}) \ No newline at end of file + EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL}) From f0e3c8f69e932d3f96a36ef5fe0bb3effeb6573f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 16:33:11 -0400 Subject: [PATCH 06/57] Adding faiss cpu indexes and build --- cpp/bench/ann/CMakeLists.txt | 19 ++-- .../ann/src/faiss/faiss_cpu_benchmark.cpp | 6 +- cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 88 ++++++++----------- cpp/cmake/thirdparty/get_faiss.cmake | 8 +- 4 files changed, 53 insertions(+), 68 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 55e0838acd..bbceba8bd3 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -40,6 +40,7 @@ option(RAFT_ANN_BENCH_SINGLE_EXE find_package(Threads REQUIRED) if(BUILD_CPU_ONLY) + set(RAFT_FAISS_ENABLE_GPU OFF) set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) @@ -51,9 +52,12 @@ else() # Disable faiss benchmarks on CUDA 12 since faiss is not yet CUDA 12-enabled. # https://github.com/rapidsai/raft/issues/1627 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) + set(RAFT_FAISS_ENABLE_GPU OFF) set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) + else() + set(RAFT_FAISS_ENABLE_GPU ON) endif() endif() @@ -61,16 +65,11 @@ set(RAFT_ANN_BENCH_USE_FAISS OFF) if(RAFT_ANN_BENCH_USE_FAISS_BFKNN OR RAFT_ANN_BENCH_USE_FAISS_IVFPQ OR RAFT_ANN_BENCH_USE_FAISS_IFFLAT -) - set(RAFT_ANN_BENCH_USE_FAISS ON) -endif() - -set(RAFT_ANN_BENCH_USE_FAISS_CPU OFF) -if(RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN + OR RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVFPQ OR RAFT_ANN_BENCH_USE_FAISS_CPU_IFFLAT ) - set(RAFT_ANN_BENCH_USE_FAISS_CPU ON) + set(RAFT_ANN_BENCH_USE_FAISS ON) endif() set(RAFT_ANN_BENCH_USE_RAFT OFF) @@ -236,13 +235,13 @@ endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cpp LINKS faiss::faiss + NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) +if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( - NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cpp LINKS faiss::faiss + NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss ) endif() diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index 1be39b3de2..7d1ba726bb 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -55,10 +55,10 @@ void parse_build_param(const nlohmann::json& conf, } else { param.usePrecomputed = false; } - if (conf.contains("useFloat16")) { - param.useFloat16 = conf.at("useFloat16"); + if (conf.contains("bitsPerCode")) { + param.bitsPerCode = conf.at("bitsPerCode"); } else { - param.useFloat16 = false; + param.bitsPerCode = 8; } } diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 7c62e34204..4ebd22ec0f 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -17,6 +17,7 @@ #include "../common/ann_types.hpp" +#define FMT_HEADER_ONLY #include #include @@ -98,6 +99,15 @@ class Faiss : public ANN { void set_search_param(const AnnSearchParam& param) override; + void init_quantizer(int dim) + { + if (this->metric_type_ == faiss::MetricType::METRIC_L2) { + this->quantizer_ = std::make_unique(dim); + } else if (this->metric_type_ == faiss::MetricType::METRIC_INNER_PRODUCT) { + this->quantizer_ = std::make_unique(dim); + } + } + // TODO: if the number of results is less than k, the remaining elements of 'neighbors' // will be filled with (size_t)-1 void search(const T* queries, @@ -117,13 +127,14 @@ class Faiss : public ANN { } protected: - template + template void save_(const std::string& file) const; - template + template void load_(const std::string& file); std::unique_ptr index_; + std::unique_ptr quantizer_; std::unique_ptr index_refine_; faiss::MetricType metric_type_; int nlist_; @@ -159,7 +170,6 @@ void Faiss::build(const T* dataset, size_t nrow, cudaStream_t stream) index_->train(nrow, dataset); // faiss::IndexFlat::train() will do nothing assert(index_->is_trained); index_->add(nrow, dataset); - stream_wait(stream); } template @@ -168,7 +178,7 @@ void Faiss::set_search_param(const AnnSearchParam& param) auto search_param = dynamic_cast(param); int nprobe = search_param.nprobe; assert(nprobe <= nlist_); - dynamic_cast(index_.get())->setNumProbes(nprobe); + dynamic_cast(index_.get())->nprobe = nprobe; if (search_param.refine_ratio > 1.0) { this->index_refine_ = std::make_unique(this->index_.get()); @@ -191,14 +201,11 @@ void Faiss::search(const T* queries, } template -template +template void Faiss::save_(const std::string& file) const { OmpSingleThreadScope omp_single_thread; - - auto cpu_index = std::make_unique(); - dynamic_cast(index_.get())->copyTo(cpu_index.get()); - faiss::write_index(cpu_index.get(), file.c_str()); + faiss::write_index(index_.get(), file.c_str()); } template @@ -206,8 +213,7 @@ template void Faiss::load_(const std::string& file) { OmpSingleThreadScope omp_single_thread; - - index_ = std::make_unique(dynamic_cast(faiss::read_index(file.c_str()))); + index_ = std::unique_ptr(dynamic_cast(faiss::read_index(file.c_str()))); } template @@ -217,20 +223,16 @@ class FaissIVFFlat : public Faiss { FaissIVFFlat(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) { - faiss::IndexIVFFlatConfig config; - config.device = this->device_; - this->index_ = std::make_unique( - &(this->_resource_), dim, param.nlist, this->metric_type_, config); + this->init_quantizer(dim); + this->index_ = std::make_unique( + this->quantizer_.get(), dim, param.nlist, this->metric_type_); } void save(const std::string& file) const override { - this->template save_(file); - } - void load(const std::string& file) override - { - this->template load_(file); + this->template save_(file); } + void load(const std::string& file) override { this->template load_(file); } }; template @@ -238,33 +240,22 @@ class FaissIVFPQ : public Faiss { public: struct BuildParam : public Faiss::BuildParam { int M; - bool useFloat16; + int bitsPerCode; bool usePrecomputed; }; FaissIVFPQ(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) { - faiss::IndexIVFPQConfig config; - config.useFloat16LookupTables = param.useFloat16; - config.usePrecomputedTables = param.usePrecomputed; - config.device = this->device_; - this->index_ = std::make_unique(&(this->_resource_), - dim, - param.nlist, - param.M, - 8, // FAISS only supports bitsPerCode=8 - this->metric_type_, - config); + this->init_quantizer(dim); + this->index_ = std::make_unique( + this->quantizer_.get(), dim, param.nlist, param.M, param.bitsPerCode, this->metric_type_); } void save(const std::string& file) const override { - this->template save_(file); - } - void load(const std::string& file) override - { - this->template load_(file); + this->template save_(file); } + void load(const std::string& file) override { this->template load_(file); } }; template @@ -286,19 +277,18 @@ class FaissIVFSQ : public Faiss { param.quantizer_type); } - faiss::IndexIVFScalarQuantizerConfig config; - config.device = this->device_; - this->index_ = std::make_unique( - &(this->_resource_), dim, param.nlist, qtype, this->metric_type_, true, config); + this->init_quantizer(dim); + this->index_ = std::make_unique( + this->quantizer_.get(), dim, param.nlist, qtype, this->metric_type_, true); } void save(const std::string& file) const override { - this->template save_(file); + this->template save_(file); } void load(const std::string& file) override { - this->template load_(file); + this->template load_(file); } }; @@ -307,10 +297,7 @@ class FaissFlat : public Faiss { public: FaissFlat(Metric metric, int dim) : Faiss(metric, dim, typename Faiss::BuildParam{}) { - faiss::IndexFlatConfig config; - config.device = this->device_; - this->index_ = - std::make_unique(&(this->_resource_), dim, this->metric_type_, config); + this->index_ = std::make_unique(dim, this->metric_type_); } // class Faiss is more like a IVF class, so need special treating here @@ -318,12 +305,9 @@ class FaissFlat : public Faiss { void save(const std::string& file) const override { - this->template save_(file); - } - void load(const std::string& file) override - { - this->template load_(file); + this->template save_(file); } + void load(const std::string& file) override { this->template load_(file); } }; } // namespace raft::bench::ann diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 6cdad23c69..183dd0007a 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -15,7 +15,7 @@ #============================================================================= function(find_and_configure_faiss) - set(oneValueArgs VERSION REPOSITORY PINNED_TAG BUILD_STATIC_LIBS EXCLUDE_FROM_ALL) + set(oneValueArgs VERSION REPOSITORY PINNED_TAG BUILD_STATIC_LIBS EXCLUDE_FROM_ALL ENABLE_GPU) cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN} ) @@ -39,7 +39,7 @@ function(find_and_configure_faiss) OPTIONS "FAISS_ENABLE_PYTHON OFF" "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" - "FAISS_ENABLE_GPU ON" + "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" @@ -84,4 +84,6 @@ find_and_configure_faiss(VERSION 1.7.0 REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} PINNED_TAG ${RAFT_FAISS_GIT_TAG} BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} - EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL}) + EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL} + ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}) + From f66fd21f80c025515b5f40cebc572d86483697cc Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 16:43:50 -0400 Subject: [PATCH 07/57] Docs updates --- docs/source/ann_benchmarks_param_tuning.md | 34 ++++++++++++++++++++++ docs/source/raft_ann_benchmarks.md | 13 +++++---- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index ca8ffa5e18..c59675067a 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -5,6 +5,10 @@ This guide outlines the various parameter settings that can be specified in [RAF ## RAFT Indexes +### `raft_brute_force` + +Use RAFT brute-force index for exact search. Brute-force has no further build or search parameters. + ### `raft_ivf_flat` IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes. @@ -52,6 +56,10 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g ## FAISS Indexes +### `faiss_gpu_flat` + +Use FAISS flat index on the GPU, which performs an exact search using brute-force and doesn't have any further build or search parameters. + ### `faiss_gpu_ivf_flat` IVF-flat uses an inverted-file index, which partitions the vectors into a series of clusters, or lists, storing them in an interleaved format which is optimized for fast distance computation. The searching of an IVF-flat index reduces the total vectors in the index to those within some user-specified nearest clusters called probes. @@ -78,7 +86,33 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | +### `faiss_flat` + +Use FAISS flat index on the CPU, which performs an exact search using brute-force and doesn't have any further build or search parameters. + +### `faiss_ivf_flat` + +Use FAISS IVF-Flat index on CPU + +| Parameter | Type | Required | Data Type | Default | Description | +|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `nlists` | `build_param` | Y | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. | +| `ratio` | `build_param` | N | Positive Integer >0 | 2 | `1/ratio` is the number of training points which should be used to train the clusters. | +| `nprobe` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | + +### `faiss_ivf_pq` + +Use FAISS IVF-PQ index on CPU +| Parameter | Type | Required | Data Type | Default | Description | +|------------------|----------------|----------|------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `nlists` | `build_param` | Y | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. | +| `ratio` | `build_param` | N | Positive Integer >0 | 2 | `1/ratio` is the number of training points which should be used to train the clusters. | +| `M` | `build_param` | Y | Positive Integer Power of 2 [8-64] | | Number of chunks or subquantizers for each vector. | +| `usePrecomputed` | `build_param` | N | Boolean. Default=`false` | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage. | +| `bitsPerCode` | `build_param` | N | Positive Integer [4-8] | 8 | Number of bits to use for each code. | +| `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | +| `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | ## HNSW diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index af0b040d34..4cda258ed3 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -268,12 +268,13 @@ The `index` section will contain a list of index objects, each of which will hav The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm. -| Library | Algorithms | -|-----------|----------------------------------------------| -| FAISS | `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` | -| GGNN | `ggnn` | -| HNSWlib | `hnswlib` | -| RAFT | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` | +| Library | Algorithms | +|-----------|-------------------------------------------------------| +| FAISS GPU | `faiss_flat`, `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` | +| FAISS CPU | `faiss_flat`, `faiss_ivf_flat`, `faiss_ivf_pq` | +| GGNN | `ggnn` | +| HNSWlib | `hnswlib` | +| RAFT | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` | From 6d7f390d0a7c62328c89779d3e650c95acd137dc Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 16:45:02 -0400 Subject: [PATCH 08/57] Resetting build all gpu arch to 0 --- build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sh b/build.sh index b2f3c15365..071820ba93 100755 --- a/build.sh +++ b/build.sh @@ -67,7 +67,7 @@ BUILD_DIRS="${LIBRAFT_BUILD_DIR} ${PYLIBRAFT_BUILD_DIR} ${RAFT_DASK_BUILD_DIR}" # Set defaults for vars modified by flags to this script CMAKE_LOG_LEVEL="" VERBOSE_FLAG="" -BUILD_ALL_GPU_ARCH=1 +BUILD_ALL_GPU_ARCH=0 BUILD_TESTS=OFF BUILD_TYPE=Release BUILD_PRIMS_BENCH=OFF From c9569a5e5ab1a71f69b0a64e0ceb271e8431c1de Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 16:48:44 -0400 Subject: [PATCH 09/57] Doc updates --- cpp/bench/ann/src/raft/raft_benchmark.cu | 2 +- cpp/bench/ann/src/raft/raft_wrapper.h | 2 ++ docs/source/raft_ann_benchmarks.md | 14 +++++++------- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 7ba381ab0a..f2636e5e7b 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -225,7 +225,7 @@ std::unique_ptr::AnnSearchParam> create_search const std::string& algo, const nlohmann::json& conf) { #ifdef RAFT_ANN_BENCH_USE_RAFT_BFKNN - if (algo == "raft_bfknn") { + if (algo == "raft_brute_force") { auto param = std::make_unique::AnnSearchParam>(); return param; } diff --git a/cpp/bench/ann/src/raft/raft_wrapper.h b/cpp/bench/ann/src/raft/raft_wrapper.h index c8d98460b7..499bdf29a1 100644 --- a/cpp/bench/ann/src/raft/raft_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_wrapper.h @@ -134,6 +134,8 @@ void RaftGpu::search(const T* queries, float* distances, cudaStream_t stream) const { + // TODO: Integrate new `raft::brute_force::index` (from + // https://github.com/rapidsai/raft/pull/1817) raft::spatial::knn::detail::fusedL2Knn(this->dim_, reinterpret_cast(neighbors), distances, diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md index d3c3e65f14..132e38984c 100644 --- a/docs/source/raft_ann_benchmarks.md +++ b/docs/source/raft_ann_benchmarks.md @@ -267,13 +267,13 @@ The `index` section will contain a list of index objects, each of which will hav The table below contains the possible settings for the `algo` field. Each unique algorithm will have its own set of `build_param` and `search_params` settings. The [ANN Algorithm Parameter Tuning Guide](ann_benchmarks_param_tuning.md) contains detailed instructions on choosing build and search parameters for each supported algorithm. -| Library | Algorithms | -|-----------|-------------------------------------------------------| -| FAISS GPU | `faiss_flat`, `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` | -| FAISS CPU | `faiss_flat`, `faiss_ivf_flat`, `faiss_ivf_pq` | -| GGNN | `ggnn` | -| HNSWlib | `hnswlib` | -| RAFT | `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` | +| Library | Algorithms | +|-----------|-----------------------------------------------------------------| +| FAISS GPU | `faiss_flat`, `faiss_gpu_ivf_flat`, `faiss_gpu_ivf_pq` | +| FAISS CPU | `faiss_flat`, `faiss_ivf_flat`, `faiss_ivf_pq` | +| GGNN | `ggnn` | +| HNSWlib | `hnswlib` | +| RAFT | `raft_brute_force`, `raft_cagra`, `raft_ivf_flat`, `raft_ivf_pq` | From 28bee2b083747e8c50863fabb2bbd536293bbb5e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 16:57:57 -0400 Subject: [PATCH 10/57] More updates --- cpp/bench/ann/CMakeLists.txt | 35 +++++++++++++------ cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 7 ++++ .../src/raft-ann-bench/run/algos.yaml | 21 +++++++---- 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index bbceba8bd3..2b6f1c1520 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -15,12 +15,14 @@ # ################################################################################################## # * benchmark options ------------------------------------------------------------------------------ -option(RAFT_ANN_BENCH_USE_FAISS_BFKNN "Include faiss' brute-force knn algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_FAISS_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN +option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force knn algorithm in benchmark" ON ) +option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON) + option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark" ON ) @@ -41,7 +43,7 @@ find_package(Threads REQUIRED) if(BUILD_CPU_ONLY) set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) + set(RAFT_ANN_BENCH_USE_FAISS_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF) @@ -53,7 +55,7 @@ else() # https://github.com/rapidsai/raft/issues/1627 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_BFKNN OFF) + set(RAFT_ANN_BENCH_USE_FAISS_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) else() @@ -62,10 +64,10 @@ else() endif() set(RAFT_ANN_BENCH_USE_FAISS OFF) -if(RAFT_ANN_BENCH_USE_FAISS_BFKNN +if(RAFT_ANN_BENCH_USE_FAISS_FLAT OR RAFT_ANN_BENCH_USE_FAISS_IVFPQ OR RAFT_ANN_BENCH_USE_FAISS_IFFLAT - OR RAFT_ANN_BENCH_USE_FAISS_CPU_BFKNN + OR RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVFPQ OR RAFT_ANN_BENCH_USE_FAISS_CPU_IFFLAT ) @@ -74,6 +76,7 @@ endif() set(RAFT_ANN_BENCH_USE_RAFT OFF) if(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ + OR RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE OR RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OR RAFT_ANN_BENCH_USE_RAFT_CAGRA ) @@ -215,6 +218,12 @@ if(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT) ) endif() +if(RAFT_ANN_BENCH_USE_RAFT_BRUTE_FORCE) + ConfigureAnnBench( + NAME RAFT_BRUTE_FORCE PATH bench/ann/src/raft/raft_benchmark.cu LINKS raft::compiled + ) +endif() + if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) ConfigureAnnBench( NAME @@ -227,9 +236,9 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) +if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( - NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss + NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss ) endif() @@ -245,14 +254,20 @@ if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ) endif() +if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) + ConfigureAnnBench( + NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss + ) +endif() + if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ) ConfigureAnnBench( NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_BFKNN) - ConfigureAnnBench(NAME FAISS_BFKNN PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss) +if(RAFT_ANN_BENCH_USE_FAISS_FLAT) + ConfigureAnnBench(NAME FAISS_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss) endif() if(RAFT_ANN_BENCH_USE_GGNN) diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 4ebd22ec0f..05a50c9803 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -17,7 +17,10 @@ #include "../common/ann_types.hpp" +#ifndef FMT_HEADER_ONLY +#define FMT_HEADER_ONLY_DEF #define FMT_HEADER_ONLY +#endif #include #include @@ -311,3 +314,7 @@ class FaissFlat : public Faiss { }; } // namespace raft::bench::ann + +#ifdef FMT_HEADER_ONLY_DEF +#undef FMT_HEADER_ONLY +#endif \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml index 30abe0dda6..247e2cc4a4 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml @@ -1,7 +1,7 @@ -faiss_gpu_ivf_flat: - executable: FAISS_IVF_FLAT_ANN_BENCH - requires_gpu: true faiss_gpu_flat: + executable: FAISS_FLAT_ANN_BENCH + requires_gpu: true +faiss_gpu_ivf_flat: executable: FAISS_IVF_FLAT_ANN_BENCH requires_gpu: true faiss_gpu_ivf_pq: @@ -10,9 +10,15 @@ faiss_gpu_ivf_pq: faiss_gpu_ivf_sq: executable: FAISS_IVF_PQ_ANN_BENCH requires_gpu: true -faiss_gpu_bfknn: - executable: FAISS_BFKNN_ANN_BENCH - requires_gpu: true +faiss_flat: + executable: FAISS_CPU_FLAT_ANN_BENCH + requires_gpu: false +faiss_ivf_flat: + executable: FAISS_CPU_IVF_FLAT_ANN_BENCH + requires_gpu: false +faiss_ivf_pq: + executable: FAISS_CPU_IVF_PQ_ANN_BENCH + requires_gpu: false raft_ivf_flat: executable: RAFT_IVF_FLAT_ANN_BENCH requires_gpu: true @@ -22,6 +28,9 @@ raft_ivf_pq: raft_cagra: executable: RAFT_CAGRA_ANN_BENCH requires_gpu: true +raft_brute_force: + executable: RAFT_BRUTE_FORCE_ANN_BENCH + requires_gpu: true ggnn: executable: GGNN_ANN_BENCH requires_gpu: true From 1e7ba4ffa05a28a12fbbfd2442995cc628ee8f8a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Tue, 12 Sep 2023 17:04:12 -0400 Subject: [PATCH 11/57] Cleaning up includes --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 25 ++++++++----------- .../ann/src/raft/raft_ivf_flat_wrapper.h | 1 - cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h | 4 +-- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 02aa2ea28b..727a6ed830 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -23,13 +23,11 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include #include @@ -107,19 +105,16 @@ class RaftCagra : public ANN { template void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) { - switch (raft::spatial::knn::detail::utils::check_pointer_residency(dataset)) { - case raft::spatial::knn::detail::utils::pointer_residency::host_only: { - auto dataset_view = - raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; - } - default: { - auto dataset_view = - raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); - index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); - return; - } + if (raft::get_device_for_address(dataset) == -1) { + auto dataset_view = + raft::make_host_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); + return; + } else { + auto dataset_view = + raft::make_device_matrix_view(dataset, IdxT(nrow), dimension_); + index_.emplace(raft::neighbors::cagra::build(handle_, index_params_, dataset_view)); + return; } } diff --git a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h index da457e32f1..b6df7de068 100644 --- a/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h index 8f1e43a706..1b74dcf975 100644 --- a/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_ivf_pq_wrapper.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -174,8 +173,7 @@ void RaftIvfPQ::search(const T* queries, raft::runtime::neighbors::ivf_pq::search( handle_, search_params_, *index_, queries_v, candidates.view(), distances_tmp.view()); - if (raft::spatial::knn::detail::utils::check_pointer_residency(dataset_.data_handle()) == - raft::spatial::knn::detail::utils::pointer_residency::device_only) { + if (raft::get_device_for_address(dataset_.data_handle()) >= 0) { auto queries_v = raft::make_device_matrix_view(queries, batch_size, index_->dim()); auto neighbors_v = raft::make_device_matrix_view((IdxT*)neighbors, batch_size, k); From 563b386c210faf5066c8ca292c6d06c928a773e7 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 15:38:56 -0400 Subject: [PATCH 12/57] Explicitly adding spdlog and fmt --- cpp/bench/ann/CMakeLists.txt | 7 +++++ cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 9 ------ cpp/cmake/thirdparty/get_faiss.cmake | 10 ++++--- cpp/cmake/thirdparty/get_fmt.cmake | 22 ++++++++++++++ cpp/cmake/thirdparty/get_spdlog.cmake | 33 +++++++++++++++++++++ 5 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 cpp/cmake/thirdparty/get_fmt.cmake create mode 100644 cpp/cmake/thirdparty/get_spdlog.cmake diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 2b6f1c1520..be2aaae0cd 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -42,6 +42,11 @@ option(RAFT_ANN_BENCH_SINGLE_EXE find_package(Threads REQUIRED) if(BUILD_CPU_ONLY) + + # Include necessary logging dependencies + include(cmake/thirdparty/get_fmt.cmake) + include(cmake/thirdparty/get_spdlog.cmake) + set(RAFT_FAISS_ENABLE_GPU OFF) set(RAFT_ANN_BENCH_USE_FAISS_FLAT OFF) set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) @@ -141,6 +146,8 @@ function(ConfigureAnnBench) $ -static-libgcc -static-libstdc++ + $<$:fmt::fmt-header-only> + $<$:spdlog::spdlog_header_only> ) set_target_properties( diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 05a50c9803..56fc9e9f11 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -16,11 +16,6 @@ #pragma once #include "../common/ann_types.hpp" - -#ifndef FMT_HEADER_ONLY -#define FMT_HEADER_ONLY_DEF -#define FMT_HEADER_ONLY -#endif #include #include @@ -314,7 +309,3 @@ class FaissFlat : public Faiss { }; } // namespace raft::bench::ann - -#ifdef FMT_HEADER_ONLY_DEF -#undef FMT_HEADER_ONLY -#endif \ No newline at end of file diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 183dd0007a..b2027ef018 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -37,19 +37,21 @@ function(find_and_configure_faiss) GIT_TAG ${PKG_PINNED_TAG} EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL} OPTIONS - "FAISS_ENABLE_PYTHON OFF" - "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" + "FAISS_ENABLE_PYTHON OFF" + "FAISS_OPT_LEVEL avx2" + "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" - "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" + "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" ) if(TARGET faiss AND NOT TARGET faiss::faiss) add_library(faiss::faiss ALIAS faiss) endif() - if(faiss_ADDED) + + if(faiss_ADDED) rapids_export(BUILD faiss EXPORT_SET faiss-targets GLOBAL_TARGETS faiss diff --git a/cpp/cmake/thirdparty/get_fmt.cmake b/cpp/cmake/thirdparty/get_fmt.cmake new file mode 100644 index 0000000000..c06f8a78bb --- /dev/null +++ b/cpp/cmake/thirdparty/get_fmt.cmake @@ -0,0 +1,22 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone fmt +function(find_and_configure_fmt) + + include(${rapids-cmake-dir}/cpm/fmt.cmake) + rapids_cpm_fmt(INSTALL_EXPORT_SET rmm-exports BUILD_EXPORT_SET rmm-exports) +endfunction() + +find_and_configure_fmt() \ No newline at end of file diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake new file mode 100644 index 0000000000..7c8f3a4722 --- /dev/null +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -0,0 +1,33 @@ +# ============================================================================= +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Use CPM to find or clone speedlog +function(find_and_configure_spdlog) + + include(${rapids-cmake-dir}/cpm/spdlog.cmake) + rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET rmm-exports) + rapids_export_package(BUILD spdlog rmm-exports) + + if(spdlog_ADDED) + rapids_export( + BUILD spdlog + EXPORT_SET spdlog + GLOBAL_TARGETS spdlog spdlog_header_only + NAMESPACE spdlog::) + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root(BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] rmm-exports) + endif() +endfunction() + +find_and_configure_spdlog() \ No newline at end of file From 9585f20e6ab3560c70c1d49a7e05512dc26b06ba Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 17:04:00 -0400 Subject: [PATCH 13/57] Using selectors for faiss --- conda/recipes/libraft/build_libraft.sh | 2 +- conda/recipes/libraft/build_libraft_headers.sh | 2 +- conda/recipes/libraft/build_libraft_template.sh | 2 +- conda/recipes/libraft/build_libraft_tests.sh | 2 +- conda/recipes/raft-ann-bench-cpu/meta.yaml | 7 +++++-- conda/recipes/raft-ann-bench/build.sh | 2 +- 6 files changed, 10 insertions(+), 7 deletions(-) diff --git a/conda/recipes/libraft/build_libraft.sh b/conda/recipes/libraft/build_libraft.sh index 71e1533893..7d4173e8bb 100644 --- a/conda/recipes/libraft/build_libraft.sh +++ b/conda/recipes/libraft/build_libraft.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx +./build.sh libraft --allgpuarch --compile-lib --build-metrics=compile_lib --incl-cache-stats --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_headers.sh b/conda/recipes/libraft/build_libraft_headers.sh index 330ac92ff3..cc3b840e43 100644 --- a/conda/recipes/libraft/build_libraft_headers.sh +++ b/conda/recipes/libraft/build_libraft_headers.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh libraft -v --allgpuarch --no-nvtx +./build.sh libraft --allgpuarch --no-nvtx diff --git a/conda/recipes/libraft/build_libraft_template.sh b/conda/recipes/libraft/build_libraft_template.sh index 974b0a5b58..bd7719af76 100644 --- a/conda/recipes/libraft/build_libraft_template.sh +++ b/conda/recipes/libraft/build_libraft_template.sh @@ -2,4 +2,4 @@ # Copyright (c) 2022-2023, NVIDIA CORPORATION. # Just building template so we verify it uses libraft.so and fail if it doesn't build -./build.sh template -v +./build.sh template diff --git a/conda/recipes/libraft/build_libraft_tests.sh b/conda/recipes/libraft/build_libraft_tests.sh index 08f0d33485..05a2b59eb0 100644 --- a/conda/recipes/libraft/build_libraft_tests.sh +++ b/conda/recipes/libraft/build_libraft_tests.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2022-2023, NVIDIA CORPORATION. -./build.sh tests bench-prims -v --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats +./build.sh tests bench-prims --allgpuarch --no-nvtx --build-metrics=tests_bench_prims --incl-cache-stats cmake --install cpp/build --component testing diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index 06737b0497..769fd0be08 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -47,6 +47,8 @@ requirements: host: - glog {{ glog_version }} - matplotlib + - libfaiss-avx2 {{ faiss_version }} # [linux64] + - libfaiss {{ faiss_version }} # [aarch64] - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -55,13 +57,14 @@ requirements: run: - glog {{ glog_version }} - h5py {{ h5py_version }} + - libfaiss-avx2 {{ faiss_version }} # [linux64] + - libfaiss {{ faiss_version }} # [aarch64] - matplotlib - python - pyyaml - pandas - benchmark - about: home: https://rapids.ai/ license: Apache-2.0 - summary: libraft ann bench + summary: RAFT ANN CPU benchmarks diff --git a/conda/recipes/raft-ann-bench/build.sh b/conda/recipes/raft-ann-bench/build.sh index 9c411774b6..00078792a1 100644 --- a/conda/recipes/raft-ann-bench/build.sh +++ b/conda/recipes/raft-ann-bench/build.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash # Copyright (c) 2023, NVIDIA CORPORATION. -./build.sh bench-ann -v --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats +./build.sh bench-ann --allgpuarch --no-nvtx --build-metrics=bench_ann --incl-cache-stats cmake --install cpp/build --component ann_bench From 87e3be0023548bd608c5c4490a3877b9cfb46fe6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 18:03:05 -0400 Subject: [PATCH 14/57] Adding ability to link against faiss avx lib (only if arch supports it) --- .../conda_build_config.yaml | 3 +++ conda/recipes/raft-ann-bench-cpu/meta.yaml | 4 ++-- .../raft-ann-bench/conda_build_config.yaml | 2 +- conda/recipes/raft-ann-bench/meta.yaml | 2 ++ cpp/bench/ann/CMakeLists.txt | 4 +++- cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 7 +++--- cpp/cmake/thirdparty/get_faiss.cmake | 24 ++++++++++++++----- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml index 0bd424f85b..1f8ac137bf 100644 --- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -10,6 +10,9 @@ sysroot_version: cmake_version: - ">=3.26.4" +faiss_version: + - "1.7.4" + glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index 769fd0be08..257793ab9b 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -48,7 +48,7 @@ requirements: - glog {{ glog_version }} - matplotlib - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} # [aarch64] + - libfaiss {{ faiss_version }} - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -58,7 +58,7 @@ requirements: - glog {{ glog_version }} - h5py {{ h5py_version }} - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} # [aarch64] + - libfaiss {{ faiss_version }} - matplotlib - python - pyyaml diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml index d156f2609b..335ca82e89 100644 --- a/conda/recipes/raft-ann-bench/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -26,7 +26,7 @@ glog_version: - ">=0.6.0" faiss_version: - - ">=1.7.1" + - "1.7.4" h5py_version: - ">=3.8.0" diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 91d0fdb729..97e297adee 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -74,6 +74,7 @@ requirements: {% if cuda_major == "11" %} - faiss-proc=*=cuda - libfaiss {{ faiss_version }} + - libfaiss-avx2 {{ faiss_version }} # [linux64] {% endif %} - h5py {{ h5py_version }} - benchmark @@ -96,6 +97,7 @@ requirements: {% if cuda_major == "11" %} - faiss-proc=*=cuda - libfaiss {{ faiss_version }} + - libfaiss-avx2 {{ faiss_version }} # [linux64] {% endif %} - h5py {{ h5py_version }} - benchmark diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index be2aaae0cd..02065cef63 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -138,7 +138,6 @@ function(ConfigureAnnBench) ${BENCH_NAME} PRIVATE raft::raft nlohmann_json::nlohmann_json - $<$:$<$:NCCL::NCCL>> ${ConfigureAnnBench_LINKS} Threads::Threads $<$:${RAFT_CTK_MATH_DEPENDENCIES}> @@ -246,18 +245,21 @@ endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss + faiss::faiss_avx2 ) endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss + faiss::faiss_avx2 ) endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss + faiss::faiss_avx2 ) endif() diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 56fc9e9f11..a96d314e2a 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -192,10 +192,9 @@ void Faiss::search(const T* queries, float* distances, cudaStream_t stream) const { - static_assert(sizeof(size_t) == sizeof(faiss::Index::idx_t), - "sizes of size_t and faiss::Index::idx_t are different"); - index_->search( - batch_size, queries, k, distances, reinterpret_cast(neighbors)); + static_assert(sizeof(size_t) == sizeof(faiss::idx_t), + "sizes of size_t and faiss::idx_t are different"); + index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); } template diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index b2027ef018..eef68c6d60 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -30,8 +30,16 @@ function(find_and_configure_faiss) set(CPM_DOWNLOAD_faiss ON) endif() + include(cmake/modules/FindAVX.cmake) + + # Link against AVX CPU lib if it exists + set(RAFT_FAISS_OPT_LEVEL "generic") + if(CXX_AVX_FOUND) + set(RAFT_FAISS_OPT_LEVEL "avx2") + endif() + rapids_cpm_find(faiss ${PKG_VERSION} - GLOBAL_TARGETS faiss::faiss + GLOBAL_TARGETS faiss::faiss faiss::faiss_avx2 CPM_ARGS GIT_REPOSITORY ${PKG_REPOSITORY} GIT_TAG ${PKG_PINNED_TAG} @@ -39,7 +47,7 @@ function(find_and_configure_faiss) OPTIONS "FAISS_ENABLE_GPU ${PKG_ENABLE_GPU}" "FAISS_ENABLE_PYTHON OFF" - "FAISS_OPT_LEVEL avx2" + "FAISS_OPT_LEVEL ${RAFT_FAISS_OPT_LEVEL}" "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" @@ -50,18 +58,22 @@ function(find_and_configure_faiss) add_library(faiss::faiss ALIAS faiss) endif() + if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2) + add_library(faiss::faiss_avx2 ALIAS faiss_avx2) + endif() + if(faiss_ADDED) rapids_export(BUILD faiss EXPORT_SET faiss-targets - GLOBAL_TARGETS faiss + GLOBAL_TARGETS faiss faiss_avx2 NAMESPACE faiss::) endif() # We generate the faiss-config files when we built faiss locally, so always do `find_dependency` rapids_export_package(BUILD OpenMP raft-ann-bench-exports) # faiss uses openMP but doesn't export a need for it - rapids_export_package(BUILD faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss) - rapids_export_package(INSTALL faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss) + rapids_export_package(BUILD faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss faiss::faiss_avx2 faiss_avx2) + rapids_export_package(INSTALL faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss faiss::faiss_avx2 faiss_avx2) # Tell cmake where it can find the generated faiss-config.cmake we wrote. include("${rapids-cmake-dir}/export/find_package_root.cmake") @@ -82,7 +94,7 @@ if(NOT RAFT_FAISS_GIT_REPOSITORY) # set(RAFT_FAISS_GIT_REPOSITORY https://github.com/facebookresearch/faiss.git) endif() -find_and_configure_faiss(VERSION 1.7.0 +find_and_configure_faiss(VERSION 1.7.4 REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} PINNED_TAG ${RAFT_FAISS_GIT_TAG} BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} From 74e6a5ddf6bc72d841075a1eea959345552698ab Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 18:03:40 -0400 Subject: [PATCH 15/57] Removing some legacy get_faiss cmake bits --- cpp/cmake/thirdparty/get_faiss.cmake | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index eef68c6d60..d245716994 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -80,20 +80,6 @@ function(find_and_configure_faiss) rapids_export_find_package_root(BUILD faiss [=[${CMAKE_CURRENT_LIST_DIR}]=] raft-ann-bench-exports) endfunction() -if(NOT RAFT_FAISS_GIT_TAG) - # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC - # (https://github.com/facebookresearch/faiss/pull/2446) - set(RAFT_FAISS_GIT_TAG fea/statically-link-ctk-v1.7.0) - # set(RAFT_FAISS_GIT_TAG bde7c0027191f29c9dadafe4f6e68ca0ee31fb30) -endif() - -if(NOT RAFT_FAISS_GIT_REPOSITORY) - # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC - # (https://github.com/facebookresearch/faiss/pull/2446) - set(RAFT_FAISS_GIT_REPOSITORY https://github.com/trxcllnt/faiss.git) - # set(RAFT_FAISS_GIT_REPOSITORY https://github.com/facebookresearch/faiss.git) -endif() - find_and_configure_faiss(VERSION 1.7.4 REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} PINNED_TAG ${RAFT_FAISS_GIT_TAG} From fcd029fead1e294f5c8a282197726133b770ed05 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 19:07:32 -0400 Subject: [PATCH 16/57] Updating faiss cpu to override search params --- cpp/bench/ann/src/faiss/faiss_wrapper.h | 90 +++++++++++++++++++------ 1 file changed, 69 insertions(+), 21 deletions(-) diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h index a426ad2df7..d98f1d347c 100644 --- a/cpp/bench/ann/src/faiss/faiss_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_wrapper.h @@ -105,7 +105,7 @@ class FaissGpu : public ANN { void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final; - void set_search_param(const AnnSearchParam& param) override; + virtual void set_search_param(const AnnSearchParam& param) {} // TODO: if the number of results is less than k, the remaining elements of 'neighbors' // will be filled with (size_t)-1 @@ -147,6 +147,7 @@ class FaissGpu : public ANN { cudaEvent_t sync_{nullptr}; cudaStream_t faiss_default_stream_{nullptr}; double training_sample_fraction_; + std::unique_ptr search_params_; }; template @@ -181,20 +182,6 @@ void FaissGpu::build(const T* dataset, size_t nrow, cudaStream_t stream) stream_wait(stream); } -template -void FaissGpu::set_search_param(const AnnSearchParam& param) -{ - auto search_param = dynamic_cast(param); - int nprobe = search_param.nprobe; - assert(nprobe <= nlist_); - dynamic_cast(index_.get())->setNumProbes(nprobe); - - if (search_param.refine_ratio > 1.0) { - this->index_refine_ = std::make_unique(this->index_.get()); - this->index_refine_.get()->k_factor = search_param.refine_ratio; - } -} - template void FaissGpu::search(const T* queries, int batch_size, @@ -203,10 +190,14 @@ void FaissGpu::search(const T* queries, float* distances, cudaStream_t stream) const { - static_assert(sizeof(size_t) == sizeof(faiss::Index::idx_t), - "sizes of size_t and faiss::Index::idx_t are different"); - index_->search( - batch_size, queries, k, distances, reinterpret_cast(neighbors)); + static_assert(sizeof(size_t) == sizeof(faiss::idx_t), + "sizes of size_t and faiss::idx_t are different"); + index_->search(batch_size, + queries, + k, + distances, + reinterpret_cast(neighbors), + search_params_.get()); stream_wait(stream); } @@ -245,6 +236,22 @@ class FaissGpuIVFFlat : public FaissGpu { &(this->gpu_resource_), dim, param.nlist, this->metric_type_, config); } + void set_search_param(const typename FaissGpu::AnnSearchParam& param) override + { + auto search_param = dynamic_cast::SearchParam&>(param); + int nprobe = search_param.nprobe; + assert(nprobe <= nlist_); + + faiss::IVFSearchParameters faiss_search_params; + faiss_search_params.nprobe = nprobe; + this->search_params_ = std::make_unique(faiss_search_params); + + if (search_param.refine_ratio > 1.0) { + this->index_refine_ = std::make_unique(this->index_.get()); + this->index_refine_.get()->k_factor = search_param.refine_ratio; + } + } + void save(const std::string& file) const override { this->template save_(file); @@ -280,6 +287,23 @@ class FaissGpuIVFPQ : public FaissGpu { config); } + void set_search_param(const typename FaissGpu::AnnSearchParam& param) override + { + auto search_param = dynamic_cast::SearchParam&>(param); + int nprobe = search_param.nprobe; + assert(nprobe <= nlist_); + + faiss::IVFPQSearchParameters faiss_search_params; + faiss_search_params.nprobe = nprobe; + + this->search_params_ = std::make_unique(faiss_search_params); + + if (search_param.refine_ratio > 1.0) { + this->index_refine_ = std::make_unique(this->index_.get()); + this->index_refine_.get()->k_factor = search_param.refine_ratio; + } + } + void save(const std::string& file) const override { this->template save_(file); @@ -293,6 +317,8 @@ class FaissGpuIVFPQ : public FaissGpu { template class FaissGpuIVFSQ : public FaissGpu { public: + using typename FaissGpu::AnnSearchParam; + using typename FaissGpu::SearchParam; struct BuildParam : public FaissGpu::BuildParam { std::string quantizer_type; }; @@ -315,6 +341,23 @@ class FaissGpuIVFSQ : public FaissGpu { &(this->gpu_resource_), dim, param.nlist, qtype, this->metric_type_, true, config); } + virtual void set_search_param(const typename FaissGpu::AnnSearchParam& param) override + { + auto search_param = dynamic_cast(param); + int nprobe = search_param.nprobe; + assert(nprobe <= nlist_); + + faiss::IVFSearchParameters faiss_search_params; + faiss_search_params.nprobe = nprobe; + + this->search_params_ = std::make_unique(faiss_search_params); + + if (search_param.refine_ratio > 1.0) { + this->index_refine_ = std::make_unique(this->index_.get()); + this->index_refine_.get()->k_factor = search_param.refine_ratio; + } + } + void save(const std::string& file) const override { this->template save_( @@ -339,9 +382,14 @@ class FaissGpuFlat : public FaissGpu { &(this->gpu_resource_), dim, this->metric_type_, config); } - // class FaissGpu is more like a IVF class, so need special treating here - void set_search_param(const typename ANN::AnnSearchParam&) override{}; + void set_search_param(const typename FaissGpu::AnnSearchParam& param) override + { + auto search_param = dynamic_cast::SearchParam&>(param); + int nprobe = search_param.nprobe; + assert(nprobe <= nlist_); + this->search_params_ = std::make_unique(); + } void save(const std::string& file) const override { this->template save_(file); From a56227e25a583dc911cc82200bff8d7dcebd3bd6 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 20:39:37 -0400 Subject: [PATCH 17/57] Trying again. --- cpp/bench/ann/CMakeLists.txt | 25 ++++++++++++++++--------- cpp/cmake/thirdparty/get_faiss.cmake | 21 +++++++++++++++------ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 02065cef63..7017910c24 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -242,41 +242,48 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) ) endif() +set(RAFT_FAISS_TARGETS faiss::faiss) +if(CXX_AVX_FOUND) + list(APPEND RAFT_FAISS_TARGETS faiss::faiss_avx2) +endif() + if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss - faiss::faiss_avx2 + NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + ${RAFT_FAISS_TARGETS} ) endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss - faiss::faiss_avx2 + NAME FAISS_CPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + ${RAFT_FAISS_TARGETS} ) endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ConfigureAnnBench( - NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS faiss::faiss - faiss::faiss_avx2 + NAME FAISS_CPU_IVF_PQ PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS + ${RAFT_FAISS_TARGETS} ) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss + NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ) ConfigureAnnBench( - NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss + NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() if(RAFT_ANN_BENCH_USE_FAISS_FLAT) - ConfigureAnnBench(NAME FAISS_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS faiss::faiss) + ConfigureAnnBench( + NAME FAISS_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} + ) endif() if(RAFT_ANN_BENCH_USE_GGNN) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index d245716994..bb7dc1eb5c 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -33,13 +33,19 @@ function(find_and_configure_faiss) include(cmake/modules/FindAVX.cmake) # Link against AVX CPU lib if it exists + set(RAFT_FAISS_GLOBAL_TARGETS faiss::faiss) + set(RAFT_FAISS_EXPORT_GLOBAL_TARGETS faiss) set(RAFT_FAISS_OPT_LEVEL "generic") if(CXX_AVX_FOUND) set(RAFT_FAISS_OPT_LEVEL "avx2") + list(APPEND RAFT_FAISS_GLOBAL_TARGETS faiss::faiss_avx2) + list(APPEND RAFT_FAISS_EXPORT_GLOBAL_TARGETS faiss_avx2) endif() + + rapids_cpm_find(faiss ${PKG_VERSION} - GLOBAL_TARGETS faiss::faiss faiss::faiss_avx2 + GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} CPM_ARGS GIT_REPOSITORY ${PKG_REPOSITORY} GIT_TAG ${PKG_PINNED_TAG} @@ -58,22 +64,25 @@ function(find_and_configure_faiss) add_library(faiss::faiss ALIAS faiss) endif() - if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2) - add_library(faiss::faiss_avx2 ALIAS faiss_avx2) + if(CXX_AVX_FOUND) + + if(TARGET faiss_avx2 AND NOT TARGET faiss::faiss_avx2) + add_library(faiss::faiss_avx2 ALIAS faiss_avx2) + endif() endif() if(faiss_ADDED) rapids_export(BUILD faiss EXPORT_SET faiss-targets - GLOBAL_TARGETS faiss faiss_avx2 + GLOBAL_TARGETS ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS} NAMESPACE faiss::) endif() # We generate the faiss-config files when we built faiss locally, so always do `find_dependency` rapids_export_package(BUILD OpenMP raft-ann-bench-exports) # faiss uses openMP but doesn't export a need for it - rapids_export_package(BUILD faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss faiss::faiss_avx2 faiss_avx2) - rapids_export_package(INSTALL faiss raft-ann-bench-exports GLOBAL_TARGETS faiss::faiss faiss faiss::faiss_avx2 faiss_avx2) + rapids_export_package(BUILD faiss raft-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS}) + rapids_export_package(INSTALL faiss raft-ann-bench-exports GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} ${RAFT_FAISS_EXPORT_GLOBAL_TARGETS}) # Tell cmake where it can find the generated faiss-config.cmake we wrote. include("${rapids-cmake-dir}/export/find_package_root.cmake") From 3fcd1e92f1f655a959f61490d62e518f52cb6b08 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 13 Sep 2023 20:40:31 -0400 Subject: [PATCH 18/57] Making libfaiss installs either or --- conda/recipes/raft-ann-bench-cpu/meta.yaml | 4 ++-- conda/recipes/raft-ann-bench/meta.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index 257793ab9b..769fd0be08 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -48,7 +48,7 @@ requirements: - glog {{ glog_version }} - matplotlib - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} + - libfaiss {{ faiss_version }} # [aarch64] - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -58,7 +58,7 @@ requirements: - glog {{ glog_version }} - h5py {{ h5py_version }} - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} + - libfaiss {{ faiss_version }} # [aarch64] - matplotlib - python - pyyaml diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 97e297adee..b86fc79e14 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -73,7 +73,7 @@ requirements: # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet {% if cuda_major == "11" %} - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} + - libfaiss {{ faiss_version }} # [aarch64] - libfaiss-avx2 {{ faiss_version }} # [linux64] {% endif %} - h5py {{ h5py_version }} @@ -96,7 +96,7 @@ requirements: # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet {% if cuda_major == "11" %} - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} + - libfaiss {{ faiss_version }} # [aarch64] - libfaiss-avx2 {{ faiss_version }} # [linux64] {% endif %} - h5py {{ h5py_version }} From 929005b5fc915be2c898930b93da3fb395d6da8e Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 25 Sep 2023 00:16:27 +0200 Subject: [PATCH 19/57] Enable orig CAGRA benchmark --- cpp/bench/ann/CMakeLists.txt | 8 +- cpp/bench/ann/src/common/benchmark.hpp | 2 +- cpp/bench/ann/src/raft/orig_cagra_wrapper.h | 83 ++------------ cpp/bench/ann/src/raft/raft_benchmark.cu | 108 +++++++++--------- cpp/include/raft/neighbors/cagra_types.hpp | 6 +- .../raft-ann-bench/run/conf/deep-100M.json | 4 +- 6 files changed, 74 insertions(+), 137 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index c59f0ed0d3..a9505ebc3d 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -208,14 +208,14 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) PATH bench/ann/src/raft/raft_benchmark.cu $<$:bench/ann/src/raft/raft_cagra.cu> - # $<$:bench/ann/src/raft/orig_cagra.cu> + $<$:bench/ann/src/raft/orig_cagra.cu> LINKS raft::compiled ) - # target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) - # target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) + target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) + target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) - # target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) + target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 4ec977700d..ab22b9715e 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -200,7 +200,6 @@ void bench_search(::benchmark::State& state, } catch (const std::exception& e) { return state.SkipWithError("Failed to create an algo: " + std::string(e.what())); } - algo->set_search_param(*search_param); const auto algo_property = parse_algo_property(algo->get_preference(), sp_json); const T* query_set = dataset->query_set(algo_property.query_memory_type); @@ -218,6 +217,7 @@ void bench_search(::benchmark::State& state, return; } } + algo->set_search_param(*search_param); std::ptrdiff_t batch_offset = 0; std::size_t queries_processed = 0; diff --git a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h index 7a8eef461f..049a7488b7 100644 --- a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h @@ -64,6 +64,7 @@ class Cagra : public ANN { using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { raft::neighbors::experimental::cagra::search_params p; + auto needs_dataset() const -> bool override { return true; } std::string search_mode; // "single-cta", "multi-cta", or "multi-kernel" int batch_size; int k; @@ -85,18 +86,18 @@ class Cagra : public ANN { float* distances, cudaStream_t stream = 0) const override; - void save(const std::string& file) const override; - void load(const std::string& file) override; - - AlgoProperty get_property() const override + // to enable dataset access from GPU memory + AlgoProperty get_preference() const override { AlgoProperty property; - property.dataset_memory_type = MemoryType::Device; - property.query_memory_type = MemoryType::Device; - property.need_dataset_when_search = true; + property.dataset_memory_type = MemoryType::Device; + property.query_memory_type = MemoryType::Device; return property; } + void save(const std::string& file) const override; + void load(const std::string& file) override; + void set_search_dataset(const T* dataset, size_t nrow) override { // std::cout << "Cagra set_search_dataset" << std::endl; @@ -139,52 +140,6 @@ void Cagra::build(const T*, size_t, cudaStream_t) throw std::runtime_error("Cagra's build() is not available now, use its tools to build index"); } -// // from cagra/tools/cagra_search.cu -// template -// void Cagra::check_search_param_(SearchParam& param) -// { -// if (param.search_mode != "single-cta" && param.search_mode != "multi-cta" && -// param.search_mode != "multi-kernel") { -// throw std::runtime_error("Cagra: illegal search_mode: '" + param.search_mode + "'"); -// } - -// if (param.team_size != 0 && param.team_size != 4 && param.team_size != 8 && -// param.team_size != 16 && param.team_size != 32) { -// throw std::runtime_error("Cagra: team_size must be 0, 4, 8, 16 or 32. " + -// std::to_string(param.team_size) + " has been given.\n"); -// } - -// if (param.internal_k < static_cast(param.k)) { -// throw std::runtime_error("Cagra: internal_k must >= k"); -// } -// if (param.internal_k % 32) { -// throw std::runtime_error("Cagra: internal_k must be multiple of 32"); -// } -// if (param.internal_k > 1024 && param.search_mode != "multi-cta") { -// throw std::runtime_error("Cagra: internal_k must <= 1024 unless in multi-cta mode"); -// } - -// if (param.max_iterations == 0) { -// if (param.search_mode == "multi-cta") { -// param.max_iterations = 1 + std::min(32 * 1.1, 32 + 10.0); -// } else { -// param.max_iterations = 1 + std::min((param.internal_k / param.search_width) * 1.1, -// (param.internal_k / param.search_width) + 10.0); -// } -// } -// if (param.max_iterations < param.min_iterations) { param.max_iterations = param.min_iterations; -// } - -// if (param.search_mode == "multi-cta") { -// int mc_num_cta_per_query = std::max(param.search_width, param.internal_k / 32); -// if (mc_num_cta_per_query * 32 < param.k) { -// throw std::runtime_error("mc_num_cta_per_query (" + std::to_string(mc_num_cta_per_query) + -// ") * 32 must be >= k (" + std::to_string(param.k) + -// ") when search_mode is multi-cta"); -// } -// } -// } - template void Cagra::set_search_param(const AnnSearchParam& param) { @@ -193,29 +148,12 @@ void Cagra::set_search_param(const AnnSearchParam& param) if (!graph_ || degree_ == 0) { throw std::runtime_error("Cagra: index is not loaded"); } auto new_search_param = dynamic_cast(param); - // check_search_param_(new_search_param); - // if (new_search_param.search_mode != search_param_.search_mode || - // new_search_param.batch_size != search_param_.batch_size || - // new_search_param.k != search_param_.k || - // new_search_param.team_size != search_param_.team_size || - // new_search_param.internal_k != search_param_.internal_k || - // new_search_param.search_width != search_param_.search_width || - // new_search_param.min_iterations != search_param_.min_iterations || - // new_search_param.max_iterations != search_param_.max_iterations) { - - if (plan_) { - // std::cout << "Cagra destroying plan" << std::endl; - - destroy_plan(plan_); - } - - // if (new_search_param.batch_size != search_param_.batch_size || - // new_search_param.k != search_param_.k) { + if (plan_) { destroy_plan(plan_); } if (tmp_neighbors_) RAFT_CUDA_TRY(cudaFree(tmp_neighbors_)); RAFT_CUDA_TRY( cudaMalloc(&tmp_neighbors_, sizeof(size_t) * new_search_param.batch_size * new_search_param.k)); - // } + search_param_ = new_search_param; // std::cout << "Cagra creating new plan" << std::endl; create_plan(&plan_, @@ -239,7 +177,6 @@ void Cagra::set_search_param(const AnnSearchParam& param) dataset_, graph_); } -//} template void Cagra::search(const T* queries, diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index e004464c08..4715dbb838 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -45,10 +45,10 @@ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; -// #include "orig_cagra_wrapper.h" -// extern template class raft::bench::ann::Cagra; -// extern template class raft::bench::ann::Cagra; -// extern template class raft::bench::ann::Cagra; +#include "orig_cagra_wrapper.h" +extern template class raft::bench::ann::Cagra; +extern template class raft::bench::ann::Cagra; +extern template class raft::bench::ann::Cagra; #endif #define JSON_DIAGNOSTICS 1 #include @@ -175,46 +175,46 @@ void parse_search_param(const nlohmann::json& conf, } } } -// template -// void parse_build_param(const nlohmann::json& conf, -// typename raft::bench::ann::Cagra::BuildParam& param) -// { -// } -// template -// void parse_search_param(const nlohmann::json& conf, -// typename raft::bench::ann::Cagra::SearchParam& param) -// { -// if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } -// if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } -// if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } -// if (conf.contains("algo")) { -// if (conf.at("algo") == "single_cta") { -// param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; -// param.search_mode = "single-cta"; -// } else if (conf.at("algo") == "multi_cta") { -// param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; -// param.search_mode = "multi-cta"; -// } else if (conf.at("algo") == "multi_kernel") { -// param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; -// param.search_mode = "multi-kernel"; -// } else if (conf.at("algo") == "auto") { -// param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; -// } else { -// std::string tmp = conf.at("algo"); -// THROW("Invalid value for algo: %s", tmp.c_str()); -// } -// } -// if (conf.contains("k")) { -// param.k = conf.at("k"); -// } else { -// param.k = 10; -// } -// if (conf.contains("batch_size")) { -// param.batch_size = conf.at("batch_size"); -// } else { -// param.batch_size = 10000; -// }; -// } +template +void parse_build_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::BuildParam& param) +{ +} +template +void parse_search_param(const nlohmann::json& conf, + typename raft::bench::ann::Cagra::SearchParam& param) +{ + if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } + if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } + if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } + if (conf.contains("algo")) { + if (conf.at("algo") == "single_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; + param.search_mode = "single-cta"; + } else if (conf.at("algo") == "multi_cta") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; + param.search_mode = "multi-cta"; + } else if (conf.at("algo") == "multi_kernel") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; + param.search_mode = "multi-kernel"; + } else if (conf.at("algo") == "auto") { + param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; + } else { + std::string tmp = conf.at("algo"); + THROW("Invalid value for algo: %s", tmp.c_str()); + } + } + if (conf.contains("k")) { + param.k = conf.at("k"); + } else { + param.k = 10; + } + if (conf.contains("batch_size")) { + param.batch_size = conf.at("batch_size"); + } else { + param.batch_size = 10000; + }; +} #endif template @@ -258,11 +258,11 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } - // if (algo == "cagra") { - // typename raft::bench::ann::Cagra::BuildParam param; - // parse_build_param(conf, param); - // ann = std::make_unique>(metric, dim, param); - // } + if (algo == "cagra") { + typename raft::bench::ann::Cagra::BuildParam param; + // parse_build_param(conf, param); + ann = std::make_unique>(metric, dim, param); + } #endif if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } @@ -300,11 +300,11 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } - // if (algo == "cagra") { - // auto param = std::make_unique::SearchParam>(); - // parse_search_param(conf, *param); - // return param; - // } + if (algo == "cagra") { + auto param = std::make_unique::SearchParam>(); + parse_search_param(conf, *param); + return param; + } #endif // else throw std::runtime_error("invalid algo: '" + algo + "'"); diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 11e2233655..3ea3fd57a1 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -239,8 +239,8 @@ struct index : ann::index { raft::distance::DistanceType metric, mdspan, row_major, data_accessor> dataset, mdspan, row_major, graph_accessor> knn_graph, - bool graph_pinned = true, - bool data_pinned = true) + bool graph_pinned = false, + bool data_pinned = false) : ann::index(), mr_(new rmm::mr::cuda_pinned_resource()), metric_(metric), @@ -253,7 +253,7 @@ struct index : ann::index { "Dataset and knn_graph must have equal number of rows"); if (data_pinned) { // copy with padding - int64_t aligned_dim = AlignDim::roundUp(dataset.extent(1)); + int64_t aligned_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); dataset_pinned_.resize(dataset.extent(0) * aligned_dim, resource::get_cuda_stream(res)); resource::sync_stream(res); diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json index 2f7aa1b54d..c03b7accb6 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json @@ -375,7 +375,7 @@ "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float" }, { - "name": "raft_cagra.dim32", + "name": "raft_cagra.dim32.single_cta", "algo": "raft_cagra", "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48}, "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", @@ -416,7 +416,7 @@ }, { "name": "raft_cagra.dim32.multi_kernel", - "algo": "raft_cagra", + "algo": "cagra", "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48}, "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", "search_params": [ From c63cbcdf315323adc777e53c844045a10630ec0a Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 25 Sep 2023 00:41:31 +0200 Subject: [PATCH 20/57] update readme --- README.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/README.md b/README.md index 56d422b489..fc512cd0da 100755 --- a/README.md +++ b/README.md @@ -1,3 +1,25 @@ +# RAFT CAGRA vs ORIGINAL CAGRA Benchmarks + +This branch adds a wrapper to ANN bench to call the original (standalone) cagra code. + +## Compile + +1. Set CAGRA path in [CMakeLists.txt](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/cpp/bench/ann/CMakeLists.txt#L215-L216) +2. Compile with RAFT_CARA benchmarks enabled + +## Benchmark + +1. Set optimized [graph path here](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json#L381). This has to be generated with the original CAGAR code. +2. To run original CAGRA, set algorithm parameter to "cagra" [here](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json#L379). To run RAFT CAGRA, use "raft_cagra" as the "algo" param. + +3. `export LD_LIBRARY_PATH=/path/to/cagra/lib:$LD_LIBRARY_PATH` + +4. run benchmark +``` +./RAFT_CAGRA_ANN_BENCH --search --overwrite --data_prefix=/data/ --benchmark_filter=cagra.dim32.*cta --benchmark_out_format=csv --benchmark_out=res_cagra.csv --override_kv=n_queries:1 /workspace1/raft/python/raft-ann-bench//src/raft-ann-bench/run/conf/deep-100M.json +``` + +# ORIGINAL RAFT README BELOW #
 RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
![RAFT tech stack](img/raft-tech-stack-vss.png) From 1ec75baa8fc21f595a4f11377ccae923acaca45a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 25 Sep 2023 15:31:26 -0400 Subject: [PATCH 21/57] Using consistent naming for faiss algos --- cpp/bench/ann/CMakeLists.txt | 40 +++--- .../ann/src/faiss/faiss_cpu_benchmark.cpp | 32 ++--- cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 77 ++++------- ...ss_benchmark.cu => faiss_gpu_benchmark.cu} | 2 +- .../{faiss_wrapper.h => faiss_gpu_wrapper.h} | 23 ++-- .../src/raft-ann-bench/run/algos.yaml | 8 +- .../raft-ann-bench/run/conf/deep-100M.json | 28 ++-- .../src/raft-ann-bench/run/conf/deep-1B.json | 4 +- .../run/conf/deep-image-96-inner.json | 108 +++++++-------- .../run/conf/fashion-mnist-784-euclidean.json | 108 +++++++-------- .../run/conf/gist-960-euclidean.json | 108 +++++++-------- .../run/conf/glove-100-angular.json | 108 +++++++-------- .../run/conf/glove-100-inner.json | 124 +++++++++--------- .../run/conf/glove-50-angular.json | 108 +++++++-------- .../run/conf/lastfm-65-angular.json | 108 +++++++-------- .../run/conf/mnist-784-euclidean.json | 108 +++++++-------- .../run/conf/nytimes-256-angular.json | 108 +++++++-------- .../run/conf/sift-128-euclidean.json | 72 +++++----- 18 files changed, 625 insertions(+), 649 deletions(-) rename cpp/bench/ann/src/faiss/{faiss_benchmark.cu => faiss_gpu_benchmark.cu} (99%) rename cpp/bench/ann/src/faiss/{faiss_wrapper.h => faiss_gpu_wrapper.h} (96%) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 7017910c24..4c294c6e0b 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -15,9 +15,9 @@ # ################################################################################################## # * benchmark options ------------------------------------------------------------------------------ -option(RAFT_ANN_BENCH_USE_FAISS_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) -option(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON) +option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON) option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force knn algorithm in benchmark" ON ) @@ -48,9 +48,9 @@ if(BUILD_CPU_ONLY) include(cmake/thirdparty/get_spdlog.cmake) set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF) set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF) set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF) set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF) @@ -60,17 +60,17 @@ else() # https://github.com/rapidsai/raft/issues/1627 if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.0.0) set(RAFT_FAISS_ENABLE_GPU OFF) - set(RAFT_ANN_BENCH_USE_FAISS_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT OFF) - set(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF) + set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF) else() set(RAFT_FAISS_ENABLE_GPU ON) endif() endif() set(RAFT_ANN_BENCH_USE_FAISS OFF) -if(RAFT_ANN_BENCH_USE_FAISS_FLAT - OR RAFT_ANN_BENCH_USE_FAISS_IVFPQ +if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT + OR RAFT_ANN_BENCH_USE_FAISS_GPU_IVFPQ OR RAFT_ANN_BENCH_USE_FAISS_IFFLAT OR RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVFPQ @@ -243,8 +243,8 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) endif() set(RAFT_FAISS_TARGETS faiss::faiss) -if(CXX_AVX_FOUND) - list(APPEND RAFT_FAISS_TARGETS faiss::faiss_avx2) +if(TARGET faiss::faiss_avx2) + set(RAFT_FAISS_TARGETS faiss::faiss_avx2) endif() if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) @@ -268,21 +268,23 @@ if(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ) ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) +if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT) ConfigureAnnBench( - NAME FAISS_IVF_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} + NAME FAISS_GPU_IVF_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS + ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_IVF_PQ) +if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ) ConfigureAnnBench( - NAME FAISS_IVF_PQ PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} + NAME FAISS_GPU_IVF_PQ PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS + ${RAFT_FAISS_TARGETS} ) endif() -if(RAFT_ANN_BENCH_USE_FAISS_FLAT) +if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT) ConfigureAnnBench( - NAME FAISS_FLAT PATH bench/ann/src/faiss/faiss_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} + NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS} ) endif() diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index 7d1ba726bb..0552e8fa36 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -31,7 +31,7 @@ namespace raft::bench::ann { template void parse_base_build_param(const nlohmann::json& conf, - typename raft::bench::ann::Faiss::BuildParam& param) + typename raft::bench::ann::FaissCpu::BuildParam& param) { param.nlist = conf.at("nlist"); if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); } @@ -39,14 +39,14 @@ void parse_base_build_param(const nlohmann::json& conf, template void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::FaissIVFFlat::BuildParam& param) + typename raft::bench::ann::FaissCpuIVFFlat::BuildParam& param) { parse_base_build_param(conf, param); } template void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::FaissIVFPQ::BuildParam& param) + typename raft::bench::ann::FaissCpuIVFPQ::BuildParam& param) { parse_base_build_param(conf, param); param.M = conf.at("M"); @@ -64,7 +64,7 @@ void parse_build_param(const nlohmann::json& conf, template void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::FaissIVFSQ::BuildParam& param) + typename raft::bench::ann::FaissCpuIVFSQ::BuildParam& param) { parse_base_build_param(conf, param); param.quantizer_type = conf.at("quantizer_type"); @@ -72,7 +72,7 @@ void parse_build_param(const nlohmann::json& conf, template void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::Faiss::SearchParam& param) + typename raft::bench::ann::FaissCpu::SearchParam& param) { param.nprobe = conf.at("nprobe"); if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); } @@ -115,14 +115,14 @@ std::unique_ptr> create_algo(const std::string& algo, if constexpr (std::is_same_v) { raft::bench::ann::Metric metric = parse_metric(distance); - if (algo == "faiss_ivf_flat") { - ann = make_algo(metric, dim, conf, dev_list); - } else if (algo == "faiss_ivf_pq") { - ann = make_algo(metric, dim, conf); - } else if (algo == "faiss_ivf_sq") { - ann = make_algo(metric, dim, conf); - } else if (algo == "faiss_flat") { - ann = std::make_unique>(metric, dim); + if (algo == "faiss_gpu_ivf_flat") { + ann = make_algo(metric, dim, conf, dev_list); + } else if (algo == "faiss_gpu_ivf_pq") { + ann = make_algo(metric, dim, conf); + } else if (algo == "faiss_gpu_ivf_sq") { + ann = make_algo(metric, dim, conf); + } else if (algo == "faiss_gpu_flat") { + ann = std::make_unique>(metric, dim); } } @@ -137,11 +137,11 @@ template std::unique_ptr::AnnSearchParam> create_search_param( const std::string& algo, const nlohmann::json& conf) { - if (algo == "faiss_ivf_flat" || algo == "faiss_ivf_pq" || algo == "faiss_ivf_sq") { - auto param = std::make_unique::SearchParam>(); + if (algo == "faiss_gpu_ivf_flat" || algo == "faiss_gpu_ivf_pq" || algo == "faiss_gpu_ivf_sq") { + auto param = std::make_unique::SearchParam>(); parse_search_param(conf, *param); return param; - } else if (algo == "faiss_flat") { + } else if (algo == "faiss_gpu_flat") { auto param = std::make_unique::AnnSearchParam>(); return param; } diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index a96d314e2a..3a78ca1724 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -44,32 +43,12 @@ faiss::MetricType parse_metric_type(raft::bench::ann::Metric metric) throw std::runtime_error("faiss supports only metric type of inner product and L2"); } } - -// note BLAS library can still use multi-threading, and -// setting environment variable like OPENBLAS_NUM_THREADS can control it -class OmpSingleThreadScope { - public: - OmpSingleThreadScope() - { - max_threads_ = omp_get_max_threads(); - omp_set_num_threads(1); - } - ~OmpSingleThreadScope() - { - // the best we can do - omp_set_num_threads(max_threads_); - } - - private: - int max_threads_; -}; - } // namespace namespace raft::bench::ann { template -class Faiss : public ANN { +class FaissCpu : public ANN { public: using typename ANN::AnnSearchParam; struct SearchParam : public AnnSearchParam { @@ -82,7 +61,7 @@ class Faiss : public ANN { int ratio = 2; }; - Faiss(Metric metric, int dim, const BuildParam& param) + FaissCpu(Metric metric, int dim, const BuildParam& param) : ANN(metric, dim), metric_type_(parse_metric_type(metric)), nlist_{param.nlist}, @@ -91,7 +70,7 @@ class Faiss : public ANN { static_assert(std::is_same_v, "faiss support only float type"); } - virtual ~Faiss() noexcept {} + virtual ~FaissCpu() noexcept {} void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final; @@ -140,9 +119,8 @@ class Faiss : public ANN { }; template -void Faiss::build(const T* dataset, size_t nrow, cudaStream_t stream) +void FaissCpu::build(const T* dataset, size_t nrow, cudaStream_t stream) { - OmpSingleThreadScope omp_single_thread; auto index_ivf = dynamic_cast(index_.get()); if (index_ivf != nullptr) { // set the min/max training size for clustering to use the whole provided training set. @@ -171,7 +149,7 @@ void Faiss::build(const T* dataset, size_t nrow, cudaStream_t stream) } template -void Faiss::set_search_param(const AnnSearchParam& param) +void FaissCpu::set_search_param(const AnnSearchParam& param) { auto search_param = dynamic_cast(param); int nprobe = search_param.nprobe; @@ -185,12 +163,12 @@ void Faiss::set_search_param(const AnnSearchParam& param) } template -void Faiss::search(const T* queries, - int batch_size, - int k, - size_t* neighbors, - float* distances, - cudaStream_t stream) const +void FaissCpu::search(const T* queries, + int batch_size, + int k, + size_t* neighbors, + float* distances, + cudaStream_t stream) const { static_assert(sizeof(size_t) == sizeof(faiss::idx_t), "sizes of size_t and faiss::idx_t are different"); @@ -199,26 +177,24 @@ void Faiss::search(const T* queries, template template -void Faiss::save_(const std::string& file) const +void FaissCpu::save_(const std::string& file) const { - OmpSingleThreadScope omp_single_thread; faiss::write_index(index_.get(), file.c_str()); } template template -void Faiss::load_(const std::string& file) +void FaissCpu::load_(const std::string& file) { - OmpSingleThreadScope omp_single_thread; index_ = std::unique_ptr(dynamic_cast(faiss::read_index(file.c_str()))); } template -class FaissIVFFlat : public Faiss { +class FaissCpuIVFFlat : public FaissCpu { public: - using typename Faiss::BuildParam; + using typename FaissCpu::BuildParam; - FaissIVFFlat(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + FaissCpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissCpu(metric, dim, param) { this->init_quantizer(dim); this->index_ = std::make_unique( @@ -233,15 +209,15 @@ class FaissIVFFlat : public Faiss { }; template -class FaissIVFPQ : public Faiss { +class FaissCpuIVFPQ : public FaissCpu { public: - struct BuildParam : public Faiss::BuildParam { + struct BuildParam : public FaissCpu::BuildParam { int M; int bitsPerCode; bool usePrecomputed; }; - FaissIVFPQ(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu(metric, dim, param) { this->init_quantizer(dim); this->index_ = std::make_unique( @@ -256,13 +232,13 @@ class FaissIVFPQ : public Faiss { }; template -class FaissIVFSQ : public Faiss { +class FaissCpuIVFSQ : public FaissCpu { public: - struct BuildParam : public Faiss::BuildParam { + struct BuildParam : public FaissCpu::BuildParam { std::string quantizer_type; }; - FaissIVFSQ(Metric metric, int dim, const BuildParam& param) : Faiss(metric, dim, param) + FaissCpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissCpu(metric, dim, param) { faiss::ScalarQuantizer::QuantizerType qtype; if (param.quantizer_type == "fp16") { @@ -270,7 +246,7 @@ class FaissIVFSQ : public Faiss { } else if (param.quantizer_type == "int8") { qtype = faiss::ScalarQuantizer::QT_8bit; } else { - throw std::runtime_error("FaissIVFSQ supports only fp16 and int8 but got " + + throw std::runtime_error("FaissCpuIVFSQ supports only fp16 and int8 but got " + param.quantizer_type); } @@ -290,14 +266,15 @@ class FaissIVFSQ : public Faiss { }; template -class FaissFlat : public Faiss { +class FaissCpuFlat : public FaissCpu { public: - FaissFlat(Metric metric, int dim) : Faiss(metric, dim, typename Faiss::BuildParam{}) + FaissCpuFlat(Metric metric, int dim) + : FaissCpu(metric, dim, typename FaissCpu::BuildParam{}) { this->index_ = std::make_unique(dim, this->metric_type_); } - // class Faiss is more like a IVF class, so need special treating here + // class FaissCpu is more like a IVF class, so need special treating here void set_search_param(const typename ANN::AnnSearchParam&) override{}; void save(const std::string& file) const override diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu similarity index 99% rename from cpp/bench/ann/src/faiss/faiss_benchmark.cu rename to cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu index 56885cce5c..8b04ba1980 100644 --- a/cpp/bench/ann/src/faiss/faiss_benchmark.cu +++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu @@ -24,7 +24,7 @@ #include "../common/ann_types.hpp" #undef WARP_SIZE -#include "faiss_wrapper.h" +#include "faiss_gpu_wrapper.h" #define JSON_DIAGNOSTICS 1 #include diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h similarity index 96% rename from cpp/bench/ann/src/faiss/faiss_wrapper.h rename to cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index d98f1d347c..6144f89bd4 100644 --- a/cpp/bench/ann/src/faiss/faiss_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -13,7 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#pragma once +#ifndef FAISS_WRAPPER_H_ +#define FAISS_WRAPPER_H_ + #include "../common/ann_types.hpp" #include @@ -105,7 +107,7 @@ class FaissGpu : public ANN { void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final; - virtual void set_search_param(const AnnSearchParam& param) {} + virtual void set_search_param(const FaissGpu::AnnSearchParam& param) {} // TODO: if the number of results is less than k, the remaining elements of 'neighbors' // will be filled with (size_t)-1 @@ -192,12 +194,7 @@ void FaissGpu::search(const T* queries, { static_assert(sizeof(size_t) == sizeof(faiss::idx_t), "sizes of size_t and faiss::idx_t are different"); - index_->search(batch_size, - queries, - k, - distances, - reinterpret_cast(neighbors), - search_params_.get()); + index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); stream_wait(stream); } @@ -317,8 +314,6 @@ class FaissGpuIVFPQ : public FaissGpu { template class FaissGpuIVFSQ : public FaissGpu { public: - using typename FaissGpu::AnnSearchParam; - using typename FaissGpu::SearchParam; struct BuildParam : public FaissGpu::BuildParam { std::string quantizer_type; }; @@ -341,9 +336,9 @@ class FaissGpuIVFSQ : public FaissGpu { &(this->gpu_resource_), dim, param.nlist, qtype, this->metric_type_, true, config); } - virtual void set_search_param(const typename FaissGpu::AnnSearchParam& param) override + void set_search_param(const typename FaissGpu::AnnSearchParam& param) override { - auto search_param = dynamic_cast(param); + auto search_param = dynamic_cast::SearchParam&>(param); int nprobe = search_param.nprobe; assert(nprobe <= nlist_); @@ -381,7 +376,6 @@ class FaissGpuFlat : public FaissGpu { this->index_ = std::make_unique( &(this->gpu_resource_), dim, this->metric_type_, config); } - void set_search_param(const typename FaissGpu::AnnSearchParam& param) override { auto search_param = dynamic_cast::SearchParam&>(param); @@ -390,6 +384,7 @@ class FaissGpuFlat : public FaissGpu { this->search_params_ = std::make_unique(); } + void save(const std::string& file) const override { this->template save_(file); @@ -401,3 +396,5 @@ class FaissGpuFlat : public FaissGpu { }; } // namespace raft::bench::ann + +#endif \ No newline at end of file diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml index 247e2cc4a4..f4f928505f 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/algos.yaml @@ -1,14 +1,14 @@ faiss_gpu_flat: - executable: FAISS_FLAT_ANN_BENCH + executable: FAISS_GPU_FLAT_ANN_BENCH requires_gpu: true faiss_gpu_ivf_flat: - executable: FAISS_IVF_FLAT_ANN_BENCH + executable: FAISS_GPU_IVF_FLAT_ANN_BENCH requires_gpu: true faiss_gpu_ivf_pq: - executable: FAISS_IVF_PQ_ANN_BENCH + executable: FAISS_GPU_IVF_PQ_ANN_BENCH requires_gpu: true faiss_gpu_ivf_sq: - executable: FAISS_IVF_PQ_ANN_BENCH + executable: FAISS_GPU_IVF_PQ_ANN_BENCH requires_gpu: true faiss_flat: executable: FAISS_CPU_FLAT_ANN_BENCH diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json index bc77b522a8..3885876022 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json @@ -87,10 +87,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist50K", + "name": "faiss_gpu_ivf_flat.nlist50K", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":50000}, - "file": "deep-100M/faiss_ivf_flat/nlist50K", + "file": "deep-100M/faiss_gpu_ivf_flat/nlist50K", "search_params": [ {"nprobe":20}, {"nprobe":30}, @@ -103,10 +103,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist100K", + "name": "faiss_gpu_ivf_flat.nlist100K", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":100000}, - "file": "deep-100M/faiss_ivf_flat/nlist100K", + "file": "deep-100M/faiss_gpu_ivf_flat/nlist100K", "search_params": [ {"nprobe":20}, {"nprobe":30}, @@ -119,10 +119,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist200K", + "name": "faiss_gpu_ivf_flat.nlist200K", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":200000}, - "file": "deep-100M/faiss_ivf_flat/nlist200K", + "file": "deep-100M/faiss_gpu_ivf_flat/nlist200K", "search_params": [ {"nprobe":20}, {"nprobe":30}, @@ -135,10 +135,10 @@ ] }, { - "name": "faiss_ivf_pq.M48-nlist16K", + "name": "faiss_gpu_ivf_pq.M48-nlist16K", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":16384, "M":48}, - "file": "deep-100M/faiss_ivf_pq/M48-nlist16K", + "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist16K", "search_params": [ {"nprobe":10}, {"nprobe":20}, @@ -151,10 +151,10 @@ ] }, { - "name": "faiss_ivf_pq.M48-nlist50K", + "name": "faiss_gpu_ivf_pq.M48-nlist50K", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":50000, "M":48}, - "file": "deep-100M/faiss_ivf_pq/M48-nlist50K", + "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist50K", "search_params": [ {"nprobe":20}, {"nprobe":30}, @@ -167,10 +167,10 @@ ] }, { - "name": "faiss_ivf_pq.M48-nlist100K", + "name": "faiss_gpu_ivf_pq.M48-nlist100K", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":100000, "M":48}, - "file": "deep-100M/faiss_ivf_pq/M48-nlist100K", + "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist100K", "search_params": [ {"nprobe":20}, {"nprobe":30}, @@ -183,10 +183,10 @@ ] }, { - "name": "faiss_ivf_pq.M48-nlist200K", + "name": "faiss_gpu_ivf_pq.M48-nlist200K", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":200000, "M":48}, - "file": "deep-100M/faiss_ivf_pq/M48-nlist200K", + "file": "deep-100M/faiss_gpu_ivf_pq/M48-nlist200K", "search_params": [ {"nprobe":20}, {"nprobe":30}, diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json index 632d2f7308..e5190e073e 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-1B.json @@ -14,10 +14,10 @@ "index": [ { - "name": "faiss_ivf_pq.M48-nlist50K", + "name": "faiss_gpu_ivf_pq.M48-nlist50K", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":50000, "M":48}, - "file": "deep-1B/faiss_ivf_pq/M48-nlist50K", + "file": "deep-1B/faiss_gpu_ivf_pq/M48-nlist50K", "search_params": [ {"nprobe":1}, {"nprobe":5}, diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json index f1c033e415..79610e4f5b 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-image-96-inner.json @@ -105,12 +105,12 @@ "search_result_file": "result/deep-image-96-inner/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist1024", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -137,15 +137,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist2048", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -172,15 +172,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist2048" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist4096", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -207,15 +207,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist4096" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist8192", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -242,15 +242,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist8192" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/deep-image-96-inner/faiss_ivf_flat/nlist16384", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -280,10 +280,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_flat/nlist16384" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -291,7 +291,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -300,10 +300,10 @@ {"nprobe": 500}, {"nprobe": 1000} ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -311,7 +311,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -320,16 +320,16 @@ {"nprobe": 500}, {"nprobe": 1000} ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -356,16 +356,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -392,16 +392,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -428,16 +428,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -464,16 +464,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -503,16 +503,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -539,16 +539,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -575,16 +575,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -611,16 +611,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -647,16 +647,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8", + "file": "index/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -686,17 +686,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/deep-image-96-inner/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/deep-image-96-inner/faiss_flat/flat", + "file": "index/deep-image-96-inner/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/deep-image-96-inner/faiss_flat/flat" + "search_result_file": "result/deep-image-96-inner/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json index 65f28fc81a..25d9751497 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/fashion-mnist-784-euclidean.json @@ -104,12 +104,12 @@ "search_result_file": "result/fashion-mnist-784-euclidean/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist1024" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist2048" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist4096" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist8192" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_flat/nlist16384" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/fashion-mnist-784-euclidean/faiss_flat/flat", + "file": "index/fashion-mnist-784-euclidean/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/fashion-mnist-784-euclidean/faiss_flat/flat" + "search_result_file": "result/fashion-mnist-784-euclidean/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json index b097aa7ca0..3ada85834f 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/gist-960-euclidean.json @@ -104,12 +104,12 @@ "search_result_file": "result/gist-960-euclidean/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist1024", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist1024" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist2048", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist2048" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist4096", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist4096" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist8192", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist8192" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/gist-960-euclidean/faiss_ivf_flat/nlist16384", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_flat/nlist16384" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8", + "file": "index/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/gist-960-euclidean/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/gist-960-euclidean/faiss_flat/flat", + "file": "index/gist-960-euclidean/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/gist-960-euclidean/faiss_flat/flat" + "search_result_file": "result/gist-960-euclidean/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json index 526aef2db0..e12a30ccd9 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-angular.json @@ -104,12 +104,12 @@ "search_result_file": "result/glove-100-angular/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/glove-100-angular/faiss_ivf_flat/nlist1024", + "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist1024" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/glove-100-angular/faiss_ivf_flat/nlist2048", + "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist2048" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/glove-100-angular/faiss_ivf_flat/nlist4096", + "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist4096" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/glove-100-angular/faiss_ivf_flat/nlist8192", + "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist8192" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/glove-100-angular/faiss_ivf_flat/nlist16384", + "file": "index/glove-100-angular/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_flat/nlist16384" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024", + "file": "index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/glove-100-angular/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-fp16", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-fp16", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-fp16", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-fp16", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-fp16", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist1024-int8", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist2048-int8", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist4096-int8", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist8192-int8", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/glove-100-angular/faiss_ivf_sq/nlist16384-int8", + "file": "index/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-100-angular/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/glove-100-angular/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/glove-100-angular/faiss_flat/flat", + "file": "index/glove-100-angular/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/glove-100-angular/faiss_flat/flat" + "search_result_file": "result/glove-100-angular/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json index 7c95ceb439..a59496507e 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-100-inner.json @@ -167,10 +167,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":1024}, - "file": "glove-100-inner/faiss_ivf_flat/nlist1024", + "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist1024", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -183,10 +183,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":2048}, - "file": "glove-100-inner/faiss_ivf_flat/nlist2048", + "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist2048", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -199,10 +199,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":4096}, - "file": "glove-100-inner/faiss_ivf_flat/nlist4096", + "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist4096", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -215,10 +215,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":8192}, - "file": "glove-100-inner/faiss_ivf_flat/nlist8192", + "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist8192", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -231,10 +231,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist":16384}, - "file": "glove-100-inner/faiss_ivf_flat/nlist16384", + "file": "glove-100-inner/faiss_gpu_ivf_flat/nlist16384", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -251,10 +251,10 @@ { - "name": "faiss_ivf_pq.M2-nlist1024", + "name": "faiss_gpu_ivf_pq.M2-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":1024, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist1024", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M2-nlist1024", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -267,10 +267,10 @@ ] }, { - "name": "faiss_ivf_pq.M2-nlist2048", + "name": "faiss_gpu_ivf_pq.M2-nlist2048", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":2048, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist2048", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M2-nlist2048", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -283,10 +283,10 @@ ] }, { - "name": "faiss_ivf_pq.M2-nlist4096", + "name": "faiss_gpu_ivf_pq.M2-nlist4096", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":4096, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist4096", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M2-nlist4096", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -299,10 +299,10 @@ ] }, { - "name": "faiss_ivf_pq.M2-nlist8192", + "name": "faiss_gpu_ivf_pq.M2-nlist8192", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":8192, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist8192", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M2-nlist8192", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -315,10 +315,10 @@ ] }, { - "name": "faiss_ivf_pq.M2-nlist16384", + "name": "faiss_gpu_ivf_pq.M2-nlist16384", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":16384, "M":2}, - "file": "glove-100-inner/faiss_ivf_pq/M2-nlist16384", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M2-nlist16384", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -332,10 +332,10 @@ ] }, { - "name": "faiss_ivf_pq.M4-nlist1024", + "name": "faiss_gpu_ivf_pq.M4-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":1024, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist1024", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M4-nlist1024", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -348,10 +348,10 @@ ] }, { - "name": "faiss_ivf_pq.M4-nlist2048", + "name": "faiss_gpu_ivf_pq.M4-nlist2048", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":2048, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist2048", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M4-nlist2048", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -364,10 +364,10 @@ ] }, { - "name": "faiss_ivf_pq.M4-nlist4096", + "name": "faiss_gpu_ivf_pq.M4-nlist4096", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":4096, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist4096", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M4-nlist4096", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -380,10 +380,10 @@ ] }, { - "name": "faiss_ivf_pq.M4-nlist8192", + "name": "faiss_gpu_ivf_pq.M4-nlist8192", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":8192, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist8192", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M4-nlist8192", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -396,10 +396,10 @@ ] }, { - "name": "faiss_ivf_pq.M4-nlist16384", + "name": "faiss_gpu_ivf_pq.M4-nlist16384", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":16384, "M":4}, - "file": "glove-100-inner/faiss_ivf_pq/M4-nlist16384", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M4-nlist16384", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -413,10 +413,10 @@ ] }, { - "name": "faiss_ivf_pq.M20-nlist1024", + "name": "faiss_gpu_ivf_pq.M20-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":1024, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist1024", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M20-nlist1024", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -429,10 +429,10 @@ ] }, { - "name": "faiss_ivf_pq.M20-nlist2048", + "name": "faiss_gpu_ivf_pq.M20-nlist2048", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":2048, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist2048", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M20-nlist2048", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -445,10 +445,10 @@ ] }, { - "name": "faiss_ivf_pq.M20-nlist4096", + "name": "faiss_gpu_ivf_pq.M20-nlist4096", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":4096, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist4096", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M20-nlist4096", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -461,10 +461,10 @@ ] }, { - "name": "faiss_ivf_pq.M20-nlist8192", + "name": "faiss_gpu_ivf_pq.M20-nlist8192", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":8192, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist8192", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M20-nlist8192", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -477,10 +477,10 @@ ] }, { - "name": "faiss_ivf_pq.M20-nlist16384", + "name": "faiss_gpu_ivf_pq.M20-nlist16384", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist":16384, "M":20}, - "file": "glove-100-inner/faiss_ivf_pq/M20-nlist16384", + "file": "glove-100-inner/faiss_gpu_ivf_pq/M20-nlist16384", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -496,10 +496,10 @@ { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":1024, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-fp16", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -512,10 +512,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":2048, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-fp16", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -528,10 +528,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":4096, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-fp16", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -544,10 +544,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":8192, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-fp16", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -560,10 +560,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":16384, "quantizer_type":"fp16"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-fp16", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -577,10 +577,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":1024, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist1024-int8", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -593,10 +593,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":2048, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist2048-int8", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -609,10 +609,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":4096, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist4096-int8", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -625,10 +625,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":8192, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist8192-int8", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -641,10 +641,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist":16384, "quantizer_type":"int8"}, - "file": "glove-100-inner/faiss_ivf_sq/nlist16384-int8", + "file": "glove-100-inner/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ {"nprobe":1}, {"nprobe":5}, @@ -658,10 +658,10 @@ ] }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "glove-100-inner/faiss_flat/flat", + "file": "glove-100-inner/faiss_gpu_flat/flat", "search_params": [{}] }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json index 9b3f192c9f..6e8d2fe1d9 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/glove-50-angular.json @@ -104,12 +104,12 @@ "search_result_file": "result/glove-50-angular/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/glove-50-angular/faiss_ivf_flat/nlist1024", + "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist1024" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/glove-50-angular/faiss_ivf_flat/nlist2048", + "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist2048" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/glove-50-angular/faiss_ivf_flat/nlist4096", + "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist4096" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/glove-50-angular/faiss_ivf_flat/nlist8192", + "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist8192" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/glove-50-angular/faiss_ivf_flat/nlist16384", + "file": "index/glove-50-angular/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_flat/nlist16384" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024", + "file": "index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/glove-50-angular/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-fp16", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-fp16", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-fp16", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-fp16", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-fp16", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist1024-int8", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist2048-int8", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist4096-int8", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist8192-int8", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/glove-50-angular/faiss_ivf_sq/nlist16384-int8", + "file": "index/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/glove-50-angular/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/glove-50-angular/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/glove-50-angular/faiss_flat/flat", + "file": "index/glove-50-angular/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/glove-50-angular/faiss_flat/flat" + "search_result_file": "result/glove-50-angular/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json index e5a4ca6e5f..2d7a2eb7d4 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/lastfm-65-angular.json @@ -104,12 +104,12 @@ "search_result_file": "result/lastfm-65-angular/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist1024", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist1024" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist2048", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist2048" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist4096", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist4096" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist8192", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist8192" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/lastfm-65-angular/faiss_ivf_flat/nlist16384", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_flat/nlist16384" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8", + "file": "index/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/lastfm-65-angular/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/lastfm-65-angular/faiss_flat/flat", + "file": "index/lastfm-65-angular/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/lastfm-65-angular/faiss_flat/flat" + "search_result_file": "result/lastfm-65-angular/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json index 2a493edeed..66750a64b1 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/mnist-784-euclidean.json @@ -104,12 +104,12 @@ "search_result_file": "result/mnist-784-euclidean/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist1024", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist1024" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist2048", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist2048" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist4096", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist4096" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist8192", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist8192" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/mnist-784-euclidean/faiss_ivf_flat/nlist16384", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_flat/nlist16384" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8", + "file": "index/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/mnist-784-euclidean/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/mnist-784-euclidean/faiss_flat/flat", + "file": "index/mnist-784-euclidean/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/mnist-784-euclidean/faiss_flat/flat" + "search_result_file": "result/mnist-784-euclidean/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json index 630b700ba5..78c5e181e3 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/nytimes-256-angular.json @@ -104,12 +104,12 @@ "search_result_file": "result/nytimes-256-angular/raft_bfknn/bfknn" }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 1024 }, - "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist1024", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024", "search_params": [ { "nprobe": 1 @@ -136,15 +136,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist1024" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist1024" }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 2048 }, - "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist2048", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048", "search_params": [ { "nprobe": 1 @@ -171,15 +171,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist2048" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist2048" }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 4096 }, - "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist4096", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096", "search_params": [ { "nprobe": 1 @@ -206,15 +206,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist4096" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist4096" }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 8192 }, - "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist8192", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192", "search_params": [ { "nprobe": 1 @@ -241,15 +241,15 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist8192" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist8192" }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": { "nlist": 16384 }, - "file": "index/nytimes-256-angular/faiss_ivf_flat/nlist16384", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384", "search_params": [ { "nprobe": 1 @@ -279,10 +279,10 @@ "nprobe": 2000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_flat/nlist16384" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_flat/nlist16384" }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -290,7 +290,7 @@ "useFloat16": true, "usePrecomputed": true }, - "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ { "nprobe": 10 @@ -311,10 +311,10 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -322,7 +322,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "index/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ { "nprobe": 10 @@ -343,16 +343,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_pq/M64-nlist1024" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_pq/M64-nlist1024" }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "fp16" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ { "nprobe": 1 @@ -379,16 +379,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-fp16" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-fp16" }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "fp16" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ { "nprobe": 1 @@ -415,16 +415,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-fp16" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-fp16" }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "fp16" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ { "nprobe": 1 @@ -451,16 +451,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-fp16" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-fp16" }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "fp16" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ { "nprobe": 1 @@ -487,16 +487,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-fp16" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-fp16" }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "fp16" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ { "nprobe": 1 @@ -526,16 +526,16 @@ "nprobe": 2000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-fp16" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-fp16" }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 1024, "quantizer_type": "int8" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ { "nprobe": 1 @@ -562,16 +562,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist1024-int8" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist1024-int8" }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 2048, "quantizer_type": "int8" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ { "nprobe": 1 @@ -598,16 +598,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist2048-int8" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist2048-int8" }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 4096, "quantizer_type": "int8" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ { "nprobe": 1 @@ -634,16 +634,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist4096-int8" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist4096-int8" }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 8192, "quantizer_type": "int8" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ { "nprobe": 1 @@ -670,16 +670,16 @@ "nprobe": 1000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist8192-int8" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist8192-int8" }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": { "nlist": 16384, "quantizer_type": "int8" }, - "file": "index/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8", + "file": "index/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ { "nprobe": 1 @@ -709,17 +709,17 @@ "nprobe": 2000 } ], - "search_result_file": "result/nytimes-256-angular/faiss_ivf_sq/nlist16384-int8" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_ivf_sq/nlist16384-int8" }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "index/nytimes-256-angular/faiss_flat/flat", + "file": "index/nytimes-256-angular/faiss_gpu_flat/flat", "search_params": [ {} ], - "search_result_file": "result/nytimes-256-angular/faiss_flat/flat" + "search_result_file": "result/nytimes-256-angular/faiss_gpu_flat/flat" }, { diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json index 439c1a10c6..ff639cb90c 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/sift-128-euclidean.json @@ -93,10 +93,10 @@ "search_params": [{"probe": 1}] }, { - "name": "faiss_ivf_flat.nlist1024", + "name": "faiss_gpu_ivf_flat.nlist1024", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist": 1024}, - "file": "sift-128-euclidean/faiss_ivf_flat/nlist1024", + "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist1024", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -109,10 +109,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist2048", + "name": "faiss_gpu_ivf_flat.nlist2048", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist": 2048}, - "file": "sift-128-euclidean/faiss_ivf_flat/nlist2048", + "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist2048", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -125,10 +125,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist4096", + "name": "faiss_gpu_ivf_flat.nlist4096", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist": 4096}, - "file": "sift-128-euclidean/faiss_ivf_flat/nlist4096", + "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist4096", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -141,10 +141,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist8192", + "name": "faiss_gpu_ivf_flat.nlist8192", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist": 8192}, - "file": "sift-128-euclidean/faiss_ivf_flat/nlist8192", + "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist8192", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -157,10 +157,10 @@ ] }, { - "name": "faiss_ivf_flat.nlist16384", + "name": "faiss_gpu_ivf_flat.nlist16384", "algo": "faiss_gpu_ivf_flat", "build_param": {"nlist": 16384}, - "file": "sift-128-euclidean/faiss_ivf_flat/nlist16384", + "file": "sift-128-euclidean/faiss_gpu_ivf_flat/nlist16384", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -174,10 +174,10 @@ ] }, { - "name": "faiss_ivf_pq.M64-nlist1024", + "name": "faiss_gpu_ivf_pq.M64-nlist1024", "algo": "faiss_gpu_ivf_pq", "build_param": {"nlist": 1024, "M": 64, "useFloat16": true, "usePrecomputed": true}, - "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024", + "file": "sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -188,7 +188,7 @@ ] }, { - "name": "faiss_ivf_pq.M64-nlist1024.noprecomp", + "name": "faiss_gpu_ivf_pq.M64-nlist1024.noprecomp", "algo": "faiss_gpu_ivf_pq", "build_param": { "nlist": 1024, @@ -196,7 +196,7 @@ "useFloat16": true, "usePrecomputed": false }, - "file": "sift-128-euclidean/faiss_ivf_pq/M64-nlist1024.noprecomp", + "file": "sift-128-euclidean/faiss_gpu_ivf_pq/M64-nlist1024.noprecomp", "search_params": [ {"nprobe": 10}, {"nprobe": 50}, @@ -207,10 +207,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist1024-fp16", + "name": "faiss_gpu_ivf_sq.nlist1024-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 1024, "quantizer_type": "fp16"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-fp16", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-fp16", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -223,10 +223,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist2048-fp16", + "name": "faiss_gpu_ivf_sq.nlist2048-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 2048, "quantizer_type": "fp16"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-fp16", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-fp16", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -239,10 +239,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist4096-fp16", + "name": "faiss_gpu_ivf_sq.nlist4096-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 4096, "quantizer_type": "fp16"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-fp16", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-fp16", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -255,10 +255,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist8192-fp16", + "name": "faiss_gpu_ivf_sq.nlist8192-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 8192, "quantizer_type": "fp16"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-fp16", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-fp16", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -271,10 +271,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist16384-fp16", + "name": "faiss_gpu_ivf_sq.nlist16384-fp16", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 16384, "quantizer_type": "fp16"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-fp16", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-fp16", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -288,10 +288,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist1024-int8", + "name": "faiss_gpu_ivf_sq.nlist1024-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 1024, "quantizer_type": "int8"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist1024-int8", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist1024-int8", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -304,10 +304,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist2048-int8", + "name": "faiss_gpu_ivf_sq.nlist2048-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 2048,"quantizer_type": "int8"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist2048-int8", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist2048-int8", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -320,10 +320,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist4096-int8", + "name": "faiss_gpu_ivf_sq.nlist4096-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 4096, "quantizer_type": "int8"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist4096-int8", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist4096-int8", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -336,10 +336,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist8192-int8", + "name": "faiss_gpu_ivf_sq.nlist8192-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 8192, "quantizer_type": "int8"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist8192-int8", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist8192-int8", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -352,10 +352,10 @@ ] }, { - "name": "faiss_ivf_sq.nlist16384-int8", + "name": "faiss_gpu_ivf_sq.nlist16384-int8", "algo": "faiss_gpu_ivf_sq", "build_param": {"nlist": 16384, "quantizer_type": "int8"}, - "file": "sift-128-euclidean/faiss_ivf_sq/nlist16384-int8", + "file": "sift-128-euclidean/faiss_gpu_ivf_sq/nlist16384-int8", "search_params": [ {"nprobe": 1}, {"nprobe": 5}, @@ -369,10 +369,10 @@ ] }, { - "name": "faiss_flat", + "name": "faiss_gpu_flat", "algo": "faiss_gpu_flat", "build_param": {}, - "file": "sift-128-euclidean/faiss_flat/flat", + "file": "sift-128-euclidean/faiss_gpu_flat/flat", "search_params": [{}] }, { From 7d213752b43ec0df1c1df72c873c3f099015c5c9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 25 Sep 2023 17:47:44 -0400 Subject: [PATCH 22/57] Updating faiss version --- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 3 ++- dependencies.yaml | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 5a9ef5bd32..6ceeb568ef 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -30,7 +30,8 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libfaiss>=1.7.1 +- libfaiss-avx2=1.7.4 +- libfaiss=1.7.4 - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 diff --git a/dependencies.yaml b/dependencies.yaml index 700a6db1bf..9e208aac5f 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -181,9 +181,16 @@ dependencies: - nlohmann_json>=3.11.2 - glog>=0.6.0 - h5py>=3.8.0 - - libfaiss>=1.7.1 + - libfaiss=1.7.4 - benchmark>=1.8.2 - faiss-proc=*=cuda + specific: + - output_types: [conda, pyproject, requirements] + matrices: + - matrix: + arch: x86_64 + packages: + - libfaiss-avx2=1.7.4 nn_bench_python: common: - output_types: [conda] From 001c22453e1cb302118f2856301014e92ea58b21 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 25 Sep 2023 19:16:32 -0400 Subject: [PATCH 23/57] Pringing raft_faiss_targets --- cpp/bench/ann/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 4c294c6e0b..e307de5b1d 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -247,6 +247,8 @@ if(TARGET faiss::faiss_avx2) set(RAFT_FAISS_TARGETS faiss::faiss_avx2) endif() +message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}") + if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS From c430bb80057fe609d3daa18727f4e3e79813e612 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 25 Sep 2023 19:41:41 -0400 Subject: [PATCH 24/57] Using faiss from pytorch --- conda/recipes/raft-ann-bench-cpu/meta.yaml | 6 ++---- conda/recipes/raft-ann-bench/meta.yaml | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index 769fd0be08..f7a34a4bb3 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -47,8 +47,7 @@ requirements: host: - glog {{ glog_version }} - matplotlib - - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} # [aarch64] + - faiss-cpu {{ faiss_version }} - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -57,8 +56,7 @@ requirements: run: - glog {{ glog_version }} - h5py {{ h5py_version }} - - libfaiss-avx2 {{ faiss_version }} # [linux64] - - libfaiss {{ faiss_version }} # [aarch64] + - faiss-cpu {{ faiss_version }} - matplotlib - python - pyyaml diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 987e2a60d7..a2ab0af643 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -73,8 +73,7 @@ requirements: # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet {% if cuda_major == "11" %} - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} # [aarch64] - - libfaiss-avx2 {{ faiss_version }} # [linux64] + - libfaiss {{ faiss_version }} {% endif %} - h5py {{ h5py_version }} - benchmark @@ -96,8 +95,7 @@ requirements: # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet {% if cuda_major == "11" %} - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} # [aarch64] - - libfaiss-avx2 {{ faiss_version }} # [linux64] + - libfaiss {{ faiss_version }} {% endif %} - h5py {{ h5py_version }} - benchmark From 02cb915b9076c7a8eed7a639ae3ee7916b0bd1cb Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 28 Sep 2023 11:02:02 +0200 Subject: [PATCH 25/57] Use huge_page_resource, memmap input graph --- cpp/bench/ann/CMakeLists.txt | 8 +- cpp/bench/ann/src/common/conf.hpp | 8 +- cpp/bench/ann/src/common/dataset.hpp | 35 ++--- cpp/bench/ann/src/raft/raft_benchmark.cu | 8 ++ cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 57 +++++--- cpp/include/raft/neighbors/cagra_types.hpp | 15 +- .../neighbors/cuda_huge_page_resource.hpp | 136 ++++++++++++++++++ .../detail/cagra/cagra_serialize.cuh | 2 +- 8 files changed, 218 insertions(+), 51 deletions(-) create mode 100644 cpp/include/raft/neighbors/cuda_huge_page_resource.hpp diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index a9505ebc3d..c59f0ed0d3 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -208,14 +208,14 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) PATH bench/ann/src/raft/raft_benchmark.cu $<$:bench/ann/src/raft/raft_cagra.cu> - $<$:bench/ann/src/raft/orig_cagra.cu> + # $<$:bench/ann/src/raft/orig_cagra.cu> LINKS raft::compiled ) - target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) - target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) + # target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) + # target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) - target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) + # target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) endif() if(RAFT_ANN_BENCH_USE_FAISS_IVF_FLAT) diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp index 405b00a74e..b02f1db59f 100644 --- a/cpp/bench/ann/src/common/conf.hpp +++ b/cpp/bench/ann/src/common/conf.hpp @@ -113,9 +113,11 @@ class Configuration { index.name = conf.at("name"); index.algo = conf.at("algo"); index.build_param = conf.at("build_param"); - index.file = conf.at("file"); - index.batch_size = batch_size; - index.k = k; + std::cout << "reading conf file" << std::endl; + index.file = conf.at("file"); + std::cout << "read conf file" << std::endl; + index.batch_size = batch_size; + index.k = k; if (conf.contains("multigpu")) { for (auto it : conf.at("multigpu")) { diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp index ccc5915b3c..b794e9ac2d 100644 --- a/cpp/bench/ann/src/common/dataset.hpp +++ b/cpp/bench/ann/src/common/dataset.hpp @@ -46,7 +46,7 @@ namespace raft::bench::ann { // and int8 type data. // As extensions for this benchmark, half and int data files will have suffixes .f16bin // and .ibin, respectively. -template +template class BinFile { public: BinFile(const std::string& file, @@ -83,10 +83,10 @@ class BinFile { { assert(!read_mode_); if (!fp_) { open_file_(); } - if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) { + if (fwrite(&nrows, sizeof(SizeT), 1, fp_) != 1) { throw std::runtime_error("fwrite() BinFile " + file_ + " failed"); } - if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) { + if (fwrite(&ndims, sizeof(SizeT), 1, fp_) != 1) { throw std::runtime_error("fwrite() BinFile " + file_ + " failed"); } @@ -134,11 +134,11 @@ class BinFile { mutable void* mapped_ptr_{nullptr}; }; -template -BinFile::BinFile(const std::string& file, - const std::string& mode, - uint32_t subset_first_row, - uint32_t subset_size) +template +BinFile::BinFile(const std::string& file, + const std::string& mode, + uint32_t subset_first_row, + uint32_t subset_size) : file_(file), read_mode_(mode == "r"), subset_first_row_(subset_first_row), @@ -161,8 +161,8 @@ BinFile::BinFile(const std::string& file, } } -template -void BinFile::open_file_() const +template +void BinFile::open_file_() const { fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w"); if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); } @@ -172,15 +172,15 @@ void BinFile::open_file_() const if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error("stat() failed: " + file_); } file_size_ = statbuf.st_size; - uint32_t header[2]; - if (fread(header, sizeof(uint32_t), 2, fp_) != 2) { + SizeT header[2]; + if (fread(header, sizeof(SizeT), 2, fp_) != 2) { throw std::runtime_error("read header of BinFile failed: " + file_); } nrows_ = header[0]; ndims_ = header[1]; size_t expected_file_size = - 2 * sizeof(uint32_t) + static_cast(nrows_) * ndims_ * sizeof(T); + 2 * sizeof(SizeT) + static_cast(nrows_) * ndims_ * sizeof(T); if (file_size_ != expected_file_size) { throw std::runtime_error("expected file size of " + file_ + " is " + std::to_string(expected_file_size) + ", however, actual size is " + @@ -208,8 +208,8 @@ void BinFile::open_file_() const } } -template -void BinFile::check_suffix_() +template +void BinFile::check_suffix_() { auto pos = file_.rfind('.'); if (pos == std::string::npos) { @@ -238,8 +238,9 @@ void BinFile::check_suffix_() throw std::runtime_error("BinFile should has .i8bin suffix: " + file_); } } else { - throw std::runtime_error( - "T of BinFile should be one of float, half, int, uint8_t, or int8_t"); + std::cout << "skipping suffix test" << std::endl; + // throw std::runtime_error( + // "T of BinFile should be one of float, half, int, uint8_t, or int8_t"); } } diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 4715dbb838..12b539a147 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -45,6 +45,8 @@ extern template class raft::bench::ann::RaftIvfPQ; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; +#endif +#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA #include "orig_cagra_wrapper.h" extern template class raft::bench::ann::Cagra; extern template class raft::bench::ann::Cagra; @@ -175,6 +177,8 @@ void parse_search_param(const nlohmann::json& conf, } } } +#endif +#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA template void parse_build_param(const nlohmann::json& conf, typename raft::bench::ann::Cagra::BuildParam& param) @@ -258,6 +262,8 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } +#endif +#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA if (algo == "cagra") { typename raft::bench::ann::Cagra::BuildParam param; // parse_build_param(conf, param); @@ -300,6 +306,8 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } +#endif +#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA if (algo == "cagra") { auto param = std::make_unique::SearchParam>(); parse_search_param(conf, *param); diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 5234f23d44..42e1710234 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -40,6 +40,7 @@ #include #include "../common/ann_types.hpp" +#include "../common/dataset.hpp" #include "raft_ann_bench_utils.h" #include @@ -61,8 +62,8 @@ class RaftCagra : public ANN { : ANN(metric, dim), index_params_(param), dimension_(dim), - mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull), - graph_(make_device_matrix(handle_, 0, 0)) + mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) //, + // graph_(make_device_matrix(handle_, 0, 0)) { rmm::mr::set_current_device_resource(&mr_); index_params_.metric = parse_metric_type(metric); @@ -103,10 +104,12 @@ class RaftCagra : public ANN { raft::device_resources handle_; BuildParam index_params_; raft::neighbors::cagra::search_params search_params_; - raft::device_matrix graph_; + // raft::device_matrix graph_; + raft::host_matrix_view graph_; std::optional> index_; int device_; int dimension_; + std::unique_ptr> graph_file_; }; template @@ -139,8 +142,7 @@ template void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) { auto dataset_v = raft::make_host_matrix_view(dataset, nrow, this->dim_); - index_.emplace( - handle_, parse_metric_type(this->metric_), dataset_v, make_const_mdspan(graph_.view())); + index_.emplace(handle_, parse_metric_type(this->metric_), dataset_v, make_const_mdspan(graph_)); // index_->update_dataset(handle_, // raft::make_host_matrix_view(dataset, nrow, @@ -199,27 +201,38 @@ void RaftCagra::load(const std::string& file) // is.close(); // 3. Cagra's knn file format - std::ifstream ifs(file, std::ios::in | std::ios::binary); - if (!ifs) { - throw std::runtime_error("File not exist : " + file + " (`" + __func__ + "` in " + __FILE__ + - ")"); - } + // std::ifstream ifs(file, std::ios::in | std::ios::binary); + // if (!ifs) { + // throw std::runtime_error("File not exist : " + file + " (`" + __func__ + "` in " + __FILE__ + + // ")"); + // } - std::size_t size, degree; + // std::size_t size, degree; - ifs.read(reinterpret_cast(&size), sizeof(size)); - ifs.read(reinterpret_cast(°ree), sizeof(degree)); + // ifs.read(reinterpret_cast(&size), sizeof(size)); + // ifs.read(reinterpret_cast(°ree), sizeof(degree)); - auto graph_h = make_host_matrix(size, degree); - graph_ = make_device_matrix(handle_, size, degree); + // auto graph_h = make_host_matrix(size, degree); + // graph_ = make_device_matrix(handle_, size, degree); - for (std::size_t i = 0; i < size; i++) { - ifs.read(reinterpret_cast(graph_h.data_handle() + i * degree), sizeof(IdxT) * degree); - } - ifs.close(); - raft::copy( - graph_.data_handle(), graph_h.data_handle(), graph_.size(), resource::get_cuda_stream(handle_)); - resource::sync_stream(handle_); + // for (std::size_t i = 0; i < size; i++) { + // ifs.read(reinterpret_cast(graph_h.data_handle() + i * degree), sizeof(IdxT) * degree); + // } + // ifs.close(); + + // raft::copy( + // graph_.data_handle(), graph_h.data_handle(), graph_.size(), + // resource::get_cuda_stream(handle_)); + // resource::sync_stream(handle_); + + graph_file_ = std::make_unique>(file, "r"); + size_t n_rows; + int ndims; + graph_file_->get_shape(&n_rows, &ndims); + IdxT* ptr = graph_file_->map(); + graph_ = make_host_matrix_view(ptr, (int64_t)n_rows, (int64_t)ndims); + // aind_v = make_device_matrix_view( + // indices.data_handle(), params_.n_queries, params_.k); } template diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 3ea3fd57a1..6576ad0f7a 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -33,7 +33,9 @@ #include #include +#include "cuda_huge_page_resource.hpp" #include "cuda_pinned_resource.hpp" + #include #include @@ -172,6 +174,7 @@ struct index : ann::index { index(raft::resources const& res) : ann::index(), mr_(new rmm::mr::cuda_pinned_resource()), + mr_huge_(new rmm::mr::cuda_huge_page_resource()), metric_(raft::distance::DistanceType::L2Expanded), dataset_(make_device_matrix(res, 0, 0)), dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), @@ -239,15 +242,18 @@ struct index : ann::index { raft::distance::DistanceType metric, mdspan, row_major, data_accessor> dataset, mdspan, row_major, graph_accessor> knn_graph, - bool graph_pinned = false, - bool data_pinned = false) + bool graph_pinned = true, + bool data_pinned = true) : ann::index(), mr_(new rmm::mr::cuda_pinned_resource()), + mr_huge_(new rmm::mr::cuda_huge_page_resource()), metric_(metric), dataset_(make_device_matrix(res, 0, 0)), - dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), + dataset_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()), + // dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), graph_(make_device_matrix(res, 0, 0)), - graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) + graph_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()) + // graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); @@ -392,6 +398,7 @@ struct index : ann::index { private: std::unique_ptr mr_; + std::unique_ptr mr_huge_; raft::distance::DistanceType metric_; raft::device_matrix dataset_; rmm::device_uvector dataset_pinned_; diff --git a/cpp/include/raft/neighbors/cuda_huge_page_resource.hpp b/cpp/include/raft/neighbors/cuda_huge_page_resource.hpp new file mode 100644 index 0000000000..c0eb6378cc --- /dev/null +++ b/cpp/include/raft/neighbors/cuda_huge_page_resource.hpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// #include + +#include +#include +#include +#include + +#include + +namespace rmm::mr { +/** + * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for + * allocation/deallocation. + */ +class cuda_huge_page_resource final : public device_memory_resource { + public: + cuda_huge_page_resource() = default; + ~cuda_huge_page_resource() override = default; + cuda_huge_page_resource(cuda_huge_page_resource const&) = default; + cuda_huge_page_resource(cuda_huge_page_resource&&) = default; + cuda_huge_page_resource& operator=(cuda_huge_page_resource const&) = default; + cuda_huge_page_resource& operator=(cuda_huge_page_resource&&) = default; + + /** + * @brief Query whether the resource supports use of non-null CUDA streams for + * allocation/deallocation. `cuda_huge_page_resource` does not support streams. + * + * @returns bool false + */ + [[nodiscard]] bool supports_streams() const noexcept override { return false; } + + /** + * @brief Query whether the resource supports the get_mem_info API. + * + * @return true + */ + [[nodiscard]] bool supports_get_mem_info() const noexcept override { return true; } + + private: + /** + * @brief Allocates memory of size at least `bytes` using cudaMalloc. + * + * The returned pointer has at least 256B alignment. + * + * @note Stream argument is ignored + * + * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled + * + * @param bytes The size, in bytes, of the allocation + * @return void* Pointer to the newly allocated memory + */ + void* do_allocate(std::size_t bytes, cuda_stream_view) override + { + void* _addr{nullptr}; + _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (_addr == MAP_FAILED) { + // RAFT_LOG_ERROR("mmap failed"); + + exit(-1); + } + if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) { + // RAFT_LOG_ERROR("madvise"); + munmap(_addr, bytes); + exit(-1); + } + memset(_addr, 0, bytes); + return _addr; + } + + /** + * @brief Deallocate memory pointed to by \p p. + * + * @note Stream argument is ignored. + * + * @throws Nothing. + * + * @param p Pointer to be deallocated + */ + void do_deallocate(void* ptr, std::size_t size, cuda_stream_view) override + { + if (munmap(ptr, size) == -1) { + // RAFT_LOG_ERROR("munmap"); + exit(-1); + } + } + + /** + * @brief Compare this resource to another. + * + * Two cuda_huge_page_resources always compare equal, because they can each + * deallocate memory allocated by the other. + * + * @throws Nothing. + * + * @param other The other resource to compare to + * @return true If the two resources are equivalent + * @return false If the two resources are not equal + */ + [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override + { + return dynamic_cast(&other) != nullptr; + } + + /** + * @brief Get free and available memory for memory resource + * + * @throws `rmm::cuda_error` if unable to retrieve memory info. + * + * @return std::pair contaiing free_size and total_size of memory + */ + [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override + { + std::size_t free_size{}; + std::size_t total_size{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); + return std::make_pair(free_size, total_size); + } +}; +} // namespace rmm::mr \ No newline at end of file diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index 0962d535ea..e1d46f2237 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -36,7 +36,7 @@ struct check_index_layout { "paste in the new size and consider updating the serialization logic"); }; -constexpr size_t expected_size = 288; +constexpr size_t expected_size = 296; template struct check_index_layout), expected_size>; /** From 30428fd9ae870d48b318cb6a1220c7c8619c0724 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 28 Sep 2023 12:58:14 -0400 Subject: [PATCH 26/57] Building faiss statically each time. Will slow down CI but alleviate some of the conda issues. --- .../bench_ann_cuda-118_arch-x86_64.yaml | 3 +-- conda/recipes/raft-ann-bench-cpu/meta.yaml | 4 ++-- conda/recipes/raft-ann-bench/meta.yaml | 16 ++++++++-------- cpp/bench/ann/CMakeLists.txt | 1 + cpp/cmake/thirdparty/get_faiss.cmake | 4 ++-- dependencies.yaml | 3 +-- 6 files changed, 15 insertions(+), 16 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 6ceeb568ef..8652c6f076 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -17,7 +17,6 @@ dependencies: - cudatoolkit - cxx-compiler - cython>=3.0.0 -- faiss-proc=*=cuda - gcc_linux-64=11.* - glog>=0.6.0 - h5py>=3.8.0 @@ -31,10 +30,10 @@ dependencies: - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 - libfaiss-avx2=1.7.4 -- libfaiss=1.7.4 - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 +- rmm=23.10.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index f7a34a4bb3..7df2c60f7e 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -47,7 +47,7 @@ requirements: host: - glog {{ glog_version }} - matplotlib - - faiss-cpu {{ faiss_version }} +# - faiss-cpu {{ faiss_version }} - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -56,7 +56,7 @@ requirements: run: - glog {{ glog_version }} - h5py {{ h5py_version }} - - faiss-cpu {{ faiss_version }} +# - faiss-cpu {{ faiss_version }} - matplotlib - python - pyyaml diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index a2ab0af643..2270460df6 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -71,10 +71,10 @@ requirements: - glog {{ glog_version }} - nlohmann_json {{ nlohmann_json_version }} # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet - {% if cuda_major == "11" %} - - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} - {% endif %} +# {% if cuda_major == "11" %} +# - faiss-proc=*=cuda +# - libfaiss {{ faiss_version }} +# {% endif %} - h5py {{ h5py_version }} - benchmark - matplotlib @@ -93,10 +93,10 @@ requirements: {% endif %} - glog {{ glog_version }} # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet - {% if cuda_major == "11" %} - - faiss-proc=*=cuda - - libfaiss {{ faiss_version }} - {% endif %} +# {% if cuda_major == "11" %} +# - faiss-proc=*=cuda +# - libfaiss {{ faiss_version }} +# {% endif %} - h5py {{ h5py_version }} - benchmark - glog {{ glog_version }} diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index e307de5b1d..f95cf67e5c 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -77,6 +77,7 @@ if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OR RAFT_ANN_BENCH_USE_FAISS_CPU_IFFLAT ) set(RAFT_ANN_BENCH_USE_FAISS ON) + set(RAFT_USE_FAISS_STATIC ON) endif() set(RAFT_ANN_BENCH_USE_RAFT OFF) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index bb7dc1eb5c..b2d0cf20cf 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -90,8 +90,8 @@ function(find_and_configure_faiss) endfunction() find_and_configure_faiss(VERSION 1.7.4 - REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} - PINNED_TAG ${RAFT_FAISS_GIT_TAG} + REPOSITORY git@github.com:facebookresearch/faiss.git + PINNED_TAG v1.7.4 BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL} ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}) diff --git a/dependencies.yaml b/dependencies.yaml index 9e208aac5f..1baf741a60 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -181,9 +181,8 @@ dependencies: - nlohmann_json>=3.11.2 - glog>=0.6.0 - h5py>=3.8.0 - - libfaiss=1.7.4 - benchmark>=1.8.2 - - faiss-proc=*=cuda + - rmm=23.10.* specific: - output_types: [conda, pyproject, requirements] matrices: From db2d21078d6a5b1d16ffcb326be09056533a8836 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 28 Sep 2023 13:03:23 -0400 Subject: [PATCH 27/57] Updates --- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- dependencies.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 13d15156f2..c6e298dcd2 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -33,8 +33,8 @@ dependencies: - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 -- rmm=23.10.* - nvcc_linux-64=11.8 +- rmm=23.10.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index eba8e27196..4b90302400 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -178,6 +178,7 @@ dependencies: packages: - &rmm_conda rmm==23.10.* - output_types: requirements + packages: # pip recognizes the index as a global option for the requirements.txt file # This index is needed for rmm-cu{11,12}. From cb2eef89dfc39730b1752199fddd7aaef75dac94 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 28 Sep 2023 13:03:37 -0400 Subject: [PATCH 28/57] Reverting --- dependencies.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/dependencies.yaml b/dependencies.yaml index 4b90302400..eba8e27196 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -178,7 +178,6 @@ dependencies: packages: - &rmm_conda rmm==23.10.* - output_types: requirements - packages: # pip recognizes the index as a global option for the requirements.txt file # This index is needed for rmm-cu{11,12}. From 375c38e6bb0741fce0f97a942234c804d4f330f2 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 28 Sep 2023 15:23:06 -0400 Subject: [PATCH 29/57] Using https for faiss github repo --- cpp/cmake/thirdparty/get_faiss.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index b2d0cf20cf..d980a4497b 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -90,7 +90,7 @@ function(find_and_configure_faiss) endfunction() find_and_configure_faiss(VERSION 1.7.4 - REPOSITORY git@github.com:facebookresearch/faiss.git + REPOSITORY https://github.com/facebookresearch/faiss.git PINNED_TAG v1.7.4 BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL} From c4fb53ce6adb732a2d1ae40606fdbf9d85e774c8 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 2 Oct 2023 18:12:04 -0400 Subject: [PATCH 30/57] Trying again --- cpp/cmake/thirdparty/get_faiss.cmake | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index d980a4497b..dde66c7a64 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -42,8 +42,6 @@ function(find_and_configure_faiss) list(APPEND RAFT_FAISS_EXPORT_GLOBAL_TARGETS faiss_avx2) endif() - - rapids_cpm_find(faiss ${PKG_VERSION} GLOBAL_TARGETS ${RAFT_FAISS_GLOBAL_TARGETS} CPM_ARGS @@ -57,7 +55,7 @@ function(find_and_configure_faiss) "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" - "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" +# "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" ) if(TARGET faiss AND NOT TARGET faiss::faiss) From 8bb273c53e19b9a467e351350c43bd2b44b17f62 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 2 Oct 2023 18:21:17 -0400 Subject: [PATCH 31/57] Using corey's fork for now --- cpp/cmake/thirdparty/get_faiss.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index dde66c7a64..c758b6cc01 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -55,7 +55,7 @@ function(find_and_configure_faiss) "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" -# "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" + "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" ) if(TARGET faiss AND NOT TARGET faiss::faiss) @@ -88,8 +88,8 @@ function(find_and_configure_faiss) endfunction() find_and_configure_faiss(VERSION 1.7.4 - REPOSITORY https://github.com/facebookresearch/faiss.git - PINNED_TAG v1.7.4 + REPOSITORY https://github.com/cjnolet/faiss.git + PINNED_TAG fea/statically-link-ctk BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL} ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}) From d53931636dea919eaa110813317c57a5d557c1bf Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 2 Oct 2023 19:46:29 -0400 Subject: [PATCH 32/57] More updates --- cpp/cmake/thirdparty/get_faiss.cmake | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index c758b6cc01..4ec95f67c4 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -87,9 +87,23 @@ function(find_and_configure_faiss) rapids_export_find_package_root(BUILD faiss [=[${CMAKE_CURRENT_LIST_DIR}]=] raft-ann-bench-exports) endfunction() +if(NOT RAFT_FAISS_GIT_TAG) + # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC + # (https://github.com/facebookresearch/faiss/pull/2446) + set(RAFT_FAISS_GIT_TAG fea/statically-link-ctk) + # set(RAFT_FAISS_GIT_TAG bde7c0027191f29c9dadafe4f6e68ca0ee31fb30) +endif() + +if(NOT RAFT_FAISS_GIT_REPOSITORY) + # TODO: Remove this once faiss supports FAISS_USE_CUDA_TOOLKIT_STATIC + # (https://github.com/facebookresearch/faiss/pull/2446) + set(RAFT_FAISS_GIT_REPOSITORY https://github.com/cjnolet/faiss.git) + # set(RAFT_FAISS_GIT_REPOSITORY https://github.com/facebookresearch/faiss.git) +endif() + find_and_configure_faiss(VERSION 1.7.4 - REPOSITORY https://github.com/cjnolet/faiss.git - PINNED_TAG fea/statically-link-ctk + REPOSITORY ${RAFT_FAISS_GIT_REPOSITORY} + PINNED_TAG ${RAFT_FAISS_GIT_TAG} BUILD_STATIC_LIBS ${RAFT_USE_FAISS_STATIC} EXCLUDE_FROM_ALL ${RAFT_EXCLUDE_FAISS_FROM_ALL} ENABLE_GPU ${RAFT_FAISS_ENABLE_GPU}) From fce179b56698b1edf65294acd50a1a489b54e69d Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 2 Oct 2023 21:35:13 -0400 Subject: [PATCH 33/57] CHecking cudatoolkit library dir --- cpp/bench/ann/CMakeLists.txt | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index f95cf67e5c..6d24835dec 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -249,7 +249,8 @@ if(TARGET faiss::faiss_avx2) endif() message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}") - +message( + "CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}) if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS @@ -339,16 +340,17 @@ if(RAFT_ANN_BENCH_SINGLE_EXE) target_compile_definitions( ANN_BENCH PRIVATE - $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}"> - $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> - ) + $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH} + "> + $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> +) - target_link_options(ANN_BENCH PRIVATE -export-dynamic) +target_link_options(ANN_BENCH PRIVATE -export-dynamic) - install( - TARGETS ANN_BENCH - COMPONENT ann_bench - DESTINATION bin/ann - EXCLUDE_FROM_ALL - ) +install( + TARGETS ANN_BENCH + COMPONENT ann_bench + DESTINATION bin/ann + EXCLUDE_FROM_ALL +) endif() From f54a75747b387e74c0b67aedf866a0a2d352e5da Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Mon, 2 Oct 2023 23:20:16 -0400 Subject: [PATCH 34/57] iTerminating string --- cpp/bench/ann/CMakeLists.txt | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index 6d24835dec..f85dc1deb7 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -249,8 +249,7 @@ if(TARGET faiss::faiss_avx2) endif() message("RAFT_FAISS_TARGETS: ${RAFT_FAISS_TARGETS}") -message( - "CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}) +message("CUDAToolkit_LIBRARY_DIR: ${CUDAToolkit_LIBRARY_DIR}") if(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT) ConfigureAnnBench( NAME FAISS_CPU_FLAT PATH bench/ann/src/faiss/faiss_cpu_benchmark.cpp LINKS @@ -342,15 +341,15 @@ if(RAFT_ANN_BENCH_SINGLE_EXE) PRIVATE $<$:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH} "> - $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> -) + $<$:ANN_BENCH_NVTX3_HEADERS_FOUND> + ) -target_link_options(ANN_BENCH PRIVATE -export-dynamic) + target_link_options(ANN_BENCH PRIVATE -export-dynamic) -install( - TARGETS ANN_BENCH - COMPONENT ann_bench - DESTINATION bin/ann - EXCLUDE_FROM_ALL -) + install( + TARGETS ANN_BENCH + COMPONENT ann_bench + DESTINATION bin/ann + EXCLUDE_FROM_ALL + ) endif() From 385b4f4de7ede5b658d1acb082d6b87a2bb056df Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 4 Oct 2023 13:49:48 -0400 Subject: [PATCH 35/57] Teach faiss about conda [hacky] --- cpp/bench/ann/CMakeLists.txt | 6 ++++++ cpp/cmake/thirdparty/get_faiss.cmake | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index f85dc1deb7..771d492ffb 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -103,6 +103,12 @@ if(RAFT_ANN_BENCH_USE_GGNN) endif() if(RAFT_ANN_BENCH_USE_FAISS) + # We need to ensure that faiss has all the conda + # information. So we currently use the very ugly + # hammer of `link_libraries` to ensure that all + # targets in this directory and the faiss directory + # will have the conda includes/link dirs + link_libraries($) include(cmake/thirdparty/get_faiss.cmake) endif() diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake index 4ec95f67c4..9d4840f524 100644 --- a/cpp/cmake/thirdparty/get_faiss.cmake +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -55,7 +55,6 @@ function(find_and_configure_faiss) "FAISS_USE_CUDA_TOOLKIT_STATIC ${CUDA_STATIC_RUNTIME}" "BUILD_TESTING OFF" "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" - "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" ) if(TARGET faiss AND NOT TARGET faiss::faiss) From 95c12db3f7f7eb4c192a80556d3bd234ddaad15f Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 4 Oct 2023 16:34:44 -0400 Subject: [PATCH 36/57] Adding thread pool to overlap faiss queries --- cpp/bench/ann/src/common/thread_pool.hpp | 131 ++++++++++++++++++ .../ann/src/faiss/faiss_cpu_benchmark.cpp | 1 + cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 28 +++- cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h | 110 +-------------- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 2 +- docs/source/ann_benchmarks_param_tuning.md | 13 +- 6 files changed, 171 insertions(+), 114 deletions(-) create mode 100644 cpp/bench/ann/src/common/thread_pool.hpp diff --git a/cpp/bench/ann/src/common/thread_pool.hpp b/cpp/bench/ann/src/common/thread_pool.hpp new file mode 100644 index 0000000000..efea938d5b --- /dev/null +++ b/cpp/bench/ann/src/common/thread_pool.hpp @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +class FixedThreadPool { + public: + FixedThreadPool(int num_threads) + { + if (num_threads < 1) { + throw std::runtime_error("num_threads must >= 1"); + } else if (num_threads == 1) { + return; + } + + tasks_ = new Task_[num_threads]; + + threads_.reserve(num_threads); + for (int i = 0; i < num_threads; ++i) { + threads_.emplace_back([&, i] { + auto& task = tasks_[i]; + while (true) { + std::unique_lock lock(task.mtx); + task.cv.wait(lock, + [&] { return task.has_task || finished_.load(std::memory_order_relaxed); }); + if (finished_.load(std::memory_order_relaxed)) { break; } + + task.task(); + task.has_task = false; + } + }); + } + } + + ~FixedThreadPool() + { + if (threads_.empty()) { return; } + + finished_.store(true, std::memory_order_relaxed); + for (unsigned i = 0; i < threads_.size(); ++i) { + auto& task = tasks_[i]; + std::lock_guard(task.mtx); + + task.cv.notify_one(); + threads_[i].join(); + } + + delete[] tasks_; + } + + template + void submit(Func f, IdxT len) + { + if (threads_.empty()) { + for (IdxT i = 0; i < len; ++i) { + f(i); + } + return; + } + + const int num_threads = threads_.size(); + // one extra part for competition among threads + const IdxT items_per_thread = len / (num_threads + 1); + std::atomic cnt(items_per_thread * num_threads); + + auto wrapped_f = [&](IdxT start, IdxT end) { + for (IdxT i = start; i < end; ++i) { + f(i); + } + + while (true) { + IdxT i = cnt.fetch_add(1, std::memory_order_relaxed); + if (i >= len) { break; } + f(i); + } + }; + + std::vector> futures; + futures.reserve(num_threads); + for (int i = 0; i < num_threads; ++i) { + IdxT start = i * items_per_thread; + auto& task = tasks_[i]; + { + std::lock_guard lock(task.mtx); + (void)lock; // stop nvcc warning + task.task = std::packaged_task([=] { wrapped_f(start, start + items_per_thread); }); + futures.push_back(task.task.get_future()); + task.has_task = true; + } + task.cv.notify_one(); + } + + for (auto& fut : futures) { + fut.wait(); + } + return; + } + + private: + struct alignas(64) Task_ { + std::mutex mtx; + std::condition_variable cv; + bool has_task = false; + std::packaged_task task; + }; + + Task_* tasks_; + std::vector threads_; + std::atomic finished_{false}; +}; diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index 0552e8fa36..f11df605ee 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -76,6 +76,7 @@ void parse_search_param(const nlohmann::json& conf, { param.nprobe = conf.at("nprobe"); if (conf.contains("refine_ratio")) { param.refine_ratio = conf.at("refine_ratio"); } + if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); } } template class Algo> diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index 3a78ca1724..a703fa9950 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -16,6 +16,8 @@ #pragma once #include "../common/ann_types.hpp" +#include "../common/thread_pool.hpp" + #include #include @@ -54,6 +56,7 @@ class FaissCpu : public ANN { struct SearchParam : public AnnSearchParam { int nprobe; float refine_ratio = 1.0; + int num_threads = omp_get_num_procs(); }; struct BuildParam { @@ -116,6 +119,9 @@ class FaissCpu : public ANN { faiss::MetricType metric_type_; int nlist_; double training_sample_fraction_; + + int num_threads_; + std::unique_ptr thread_pool_; }; template @@ -160,6 +166,11 @@ void FaissCpu::set_search_param(const AnnSearchParam& param) this->index_refine_ = std::make_unique(this->index_.get()); this->index_refine_.get()->k_factor = search_param.refine_ratio; } + + if (!thread_pool_ || num_threads_ != search_param.num_threads) { + num_threads_ = search_param.num_threads; + thread_pool_ = std::make_unique(num_threads_); + } } template @@ -172,7 +183,13 @@ void FaissCpu::search(const T* queries, { static_assert(sizeof(size_t) == sizeof(faiss::idx_t), "sizes of size_t and faiss::idx_t are different"); - index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); + + thread_pool_->submit( + [&](int i) { + // Use thread pool for batch size = 1. FAISS multi-threads internally for batch size > 1. + index_->search(batch_size, queries, k, distances, reinterpret_cast(neighbors)); + }, + 1); } template @@ -275,7 +292,14 @@ class FaissCpuFlat : public FaissCpu { } // class FaissCpu is more like a IVF class, so need special treating here - void set_search_param(const typename ANN::AnnSearchParam&) override{}; + void set_search_param(const typename ANN::AnnSearchParam& param) override + { + auto search_param = dynamic_cast::SearchParam&>(param); + if (!this->thread_pool_ || this->num_threads_ != search_param.num_threads) { + this->num_threads_ = search_param.num_threads; + this->thread_pool_ = std::make_unique(this->num_threads_); + } + }; void save(const std::string& file) const override { diff --git a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h index 4d7b993aa1..df44605493 100644 --- a/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h +++ b/cpp/bench/ann/src/hnswlib/hnswlib_wrapper.h @@ -31,9 +31,8 @@ #include #include -#include - #include "../common/ann_types.hpp" +#include "../common/thread_pool.hpp" #include namespace raft::bench::ann { @@ -53,112 +52,6 @@ struct hnsw_dist_t { using type = int; }; -class FixedThreadPool { - public: - FixedThreadPool(int num_threads) - { - if (num_threads < 1) { - throw std::runtime_error("num_threads must >= 1"); - } else if (num_threads == 1) { - return; - } - - tasks_ = new Task_[num_threads]; - - threads_.reserve(num_threads); - for (int i = 0; i < num_threads; ++i) { - threads_.emplace_back([&, i] { - auto& task = tasks_[i]; - while (true) { - std::unique_lock lock(task.mtx); - task.cv.wait(lock, - [&] { return task.has_task || finished_.load(std::memory_order_relaxed); }); - if (finished_.load(std::memory_order_relaxed)) { break; } - - task.task(); - task.has_task = false; - } - }); - } - } - - ~FixedThreadPool() - { - if (threads_.empty()) { return; } - - finished_.store(true, std::memory_order_relaxed); - for (unsigned i = 0; i < threads_.size(); ++i) { - auto& task = tasks_[i]; - std::lock_guard(task.mtx); - - task.cv.notify_one(); - threads_[i].join(); - } - - delete[] tasks_; - } - - template - void submit(Func f, IdxT len) - { - if (threads_.empty()) { - for (IdxT i = 0; i < len; ++i) { - f(i); - } - return; - } - - const int num_threads = threads_.size(); - // one extra part for competition among threads - const IdxT items_per_thread = len / (num_threads + 1); - std::atomic cnt(items_per_thread * num_threads); - - auto wrapped_f = [&](IdxT start, IdxT end) { - for (IdxT i = start; i < end; ++i) { - f(i); - } - - while (true) { - IdxT i = cnt.fetch_add(1, std::memory_order_relaxed); - if (i >= len) { break; } - f(i); - } - }; - - std::vector> futures; - futures.reserve(num_threads); - for (int i = 0; i < num_threads; ++i) { - IdxT start = i * items_per_thread; - auto& task = tasks_[i]; - { - std::lock_guard lock(task.mtx); - (void)lock; // stop nvcc warning - task.task = std::packaged_task([=] { wrapped_f(start, start + items_per_thread); }); - futures.push_back(task.task.get_future()); - task.has_task = true; - } - task.cv.notify_one(); - } - - for (auto& fut : futures) { - fut.wait(); - } - return; - } - - private: - struct alignas(64) Task_ { - std::mutex mtx; - std::condition_variable cv; - bool has_task = false; - std::packaged_task task; - }; - - Task_* tasks_; - std::vector threads_; - std::atomic finished_{false}; -}; - template class HnswLib : public ANN { public: @@ -281,6 +174,7 @@ void HnswLib::search( { thread_pool_->submit( [&](int i) { + // hnsw can only handle a single vector at a time. get_search_knn_results_(query + i * dim_, k, indices + i * k, distances + i * k); }, batch_size); diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 727a6ed830..19c5151186 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -52,7 +52,7 @@ class RaftCagra : public ANN { using BuildParam = raft::neighbors::cagra::index_params; - RaftCagra(Metric metric, int dim, const BuildParam& param) + RaftCagra(Metric metric, int dim, const BuildParam& param, int concurrent_searches = 1) : ANN(metric, dim), index_params_(param), dimension_(dim), diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index b70d1d788f..0faaeba59c 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -93,11 +93,16 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | -### `faiss_flat` +### `faiss_cpu_flat` Use FAISS flat index on the CPU, which performs an exact search using brute-force and doesn't have any further build or search parameters. -### `faiss_ivf_flat` + +| Parameter | Type | Required | Data Type | Default | Description | +|-----------|----------------|----------|---------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | + +### `faiss_cpu_ivf_flat` Use FAISS IVF-Flat index on CPU @@ -106,8 +111,9 @@ Use FAISS IVF-Flat index on CPU | `nlists` | `build_param` | Y | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. | | `ratio` | `build_param` | N | Positive Integer >0 | 2 | `1/ratio` is the number of training points which should be used to train the clusters. | | `nprobe` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | +| `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | -### `faiss_ivf_pq` +### `faiss_cpu_ivf_pq` Use FAISS IVF-PQ index on CPU @@ -120,6 +126,7 @@ Use FAISS IVF-PQ index on CPU | `bitsPerCode` | `build_param` | N | Positive Integer [4-8] | 8 | Number of bits to use for each code. | | `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. | | `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | +| `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | ## HNSW From 1e7b5c8605472e9e433ad1a5a84da86602f1933e Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 5 Oct 2023 23:19:46 -0400 Subject: [PATCH 37/57] Seeing if this fixes the devcontainers --- .../environments/bench_ann_cuda-118_arch-x86_64.yaml | 1 - .../raft-ann-bench-cpu/conda_build_config.yaml | 3 --- conda/recipes/raft-ann-bench-cpu/meta.yaml | 2 -- conda/recipes/raft-ann-bench/conda_build_config.yaml | 3 --- conda/recipes/raft-ann-bench/meta.yaml | 10 ---------- cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp | 12 ++++++------ cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h | 2 ++ cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h | 2 ++ dependencies.yaml | 7 ------- 9 files changed, 10 insertions(+), 32 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index c6e298dcd2..4e33ee435f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -29,7 +29,6 @@ dependencies: - libcusolver=11.4.1.48 - libcusparse-dev=11.7.5.86 - libcusparse=11.7.5.86 -- libfaiss-avx2=1.7.4 - nccl>=2.9.9 - ninja - nlohmann_json>=3.11.2 diff --git a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml index 1f8ac137bf..0bd424f85b 100644 --- a/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml @@ -10,9 +10,6 @@ sysroot_version: cmake_version: - ">=3.26.4" -faiss_version: - - "1.7.4" - glog_version: - ">=0.6.0" diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml index 7df2c60f7e..71571465c1 100644 --- a/conda/recipes/raft-ann-bench-cpu/meta.yaml +++ b/conda/recipes/raft-ann-bench-cpu/meta.yaml @@ -47,7 +47,6 @@ requirements: host: - glog {{ glog_version }} - matplotlib -# - faiss-cpu {{ faiss_version }} - nlohmann_json {{ nlohmann_json_version }} - python - pyyaml @@ -56,7 +55,6 @@ requirements: run: - glog {{ glog_version }} - h5py {{ h5py_version }} -# - faiss-cpu {{ faiss_version }} - matplotlib - python - pyyaml diff --git a/conda/recipes/raft-ann-bench/conda_build_config.yaml b/conda/recipes/raft-ann-bench/conda_build_config.yaml index 335ca82e89..da0b893c1d 100644 --- a/conda/recipes/raft-ann-bench/conda_build_config.yaml +++ b/conda/recipes/raft-ann-bench/conda_build_config.yaml @@ -25,9 +25,6 @@ gtest_version: glog_version: - ">=0.6.0" -faiss_version: - - "1.7.4" - h5py_version: - ">=3.8.0" diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml index 2270460df6..1c6fa5723b 100644 --- a/conda/recipes/raft-ann-bench/meta.yaml +++ b/conda/recipes/raft-ann-bench/meta.yaml @@ -70,11 +70,6 @@ requirements: {% endif %} - glog {{ glog_version }} - nlohmann_json {{ nlohmann_json_version }} - # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet -# {% if cuda_major == "11" %} -# - faiss-proc=*=cuda -# - libfaiss {{ faiss_version }} -# {% endif %} - h5py {{ h5py_version }} - benchmark - matplotlib @@ -92,11 +87,6 @@ requirements: - cudatoolkit {% endif %} - glog {{ glog_version }} - # Temporarily ignore faiss benchmarks on CUDA 12 because packages do not exist yet -# {% if cuda_major == "11" %} -# - faiss-proc=*=cuda -# - libfaiss {{ faiss_version }} -# {% endif %} - h5py {{ h5py_version }} - benchmark - glog {{ glog_version }} diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp index f11df605ee..97d1bbf307 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp +++ b/cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp @@ -116,13 +116,13 @@ std::unique_ptr> create_algo(const std::string& algo, if constexpr (std::is_same_v) { raft::bench::ann::Metric metric = parse_metric(distance); - if (algo == "faiss_gpu_ivf_flat") { + if (algo == "faiss_cpu_ivf_flat") { ann = make_algo(metric, dim, conf, dev_list); - } else if (algo == "faiss_gpu_ivf_pq") { + } else if (algo == "faiss_cpu_ivf_pq") { ann = make_algo(metric, dim, conf); - } else if (algo == "faiss_gpu_ivf_sq") { + } else if (algo == "faiss_cpu_ivf_sq") { ann = make_algo(metric, dim, conf); - } else if (algo == "faiss_gpu_flat") { + } else if (algo == "faiss_cpu_flat") { ann = std::make_unique>(metric, dim); } } @@ -138,11 +138,11 @@ template std::unique_ptr::AnnSearchParam> create_search_param( const std::string& algo, const nlohmann::json& conf) { - if (algo == "faiss_gpu_ivf_flat" || algo == "faiss_gpu_ivf_pq" || algo == "faiss_gpu_ivf_sq") { + if (algo == "faiss_cpu_ivf_flat" || algo == "faiss_cpu_ivf_pq" || algo == "faiss_cpu_ivf_sq") { auto param = std::make_unique::SearchParam>(); parse_search_param(conf, *param); return param; - } else if (algo == "faiss_gpu_flat") { + } else if (algo == "faiss_cpu_flat") { auto param = std::make_unique::AnnSearchParam>(); return param; } diff --git a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h index a703fa9950..028a444530 100644 --- a/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h @@ -248,6 +248,8 @@ class FaissCpuIVFPQ : public FaissCpu { void load(const std::string& file) override { this->template load_(file); } }; +// TODO: Enable this in cmake +// ref: https://github.com/rapidsai/raft/issues/1876 template class FaissCpuIVFSQ : public FaissCpu { public: diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index 6144f89bd4..38eeddf813 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -311,6 +311,8 @@ class FaissGpuIVFPQ : public FaissGpu { } }; +// TODO: Enable this in cmake +// ref: https://github.com/rapidsai/raft/issues/1876 template class FaissGpuIVFSQ : public FaissGpu { public: diff --git a/dependencies.yaml b/dependencies.yaml index c8bdfe6da7..b9df7d34c3 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -227,13 +227,6 @@ dependencies: - h5py>=3.8.0 - benchmark>=1.8.2 - rmm=23.10.* - specific: - - output_types: [conda, pyproject, requirements] - matrices: - - matrix: - arch: x86_64 - packages: - - libfaiss-avx2=1.7.4 nn_bench_python: common: - output_types: [conda] From 667b95c4211ba45df9914cd2e720f8acf151b410 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 6 Oct 2023 00:21:53 -0400 Subject: [PATCH 38/57] Fixing dependencies.yml --- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 2 +- dependencies.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 4e33ee435f..98b07c2c3f 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -33,7 +33,7 @@ dependencies: - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 -- rmm=23.10.* +- rmm=23.12.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/dependencies.yaml b/dependencies.yaml index b9df7d34c3..e3ca128340 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -226,7 +226,7 @@ dependencies: - glog>=0.6.0 - h5py>=3.8.0 - benchmark>=1.8.2 - - rmm=23.10.* + - rmm=23.12.* nn_bench_python: common: - output_types: [conda] From daffaf475ff17f561d6d64ed2e4cd2f204686c6a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Fri, 6 Oct 2023 22:04:27 -0400 Subject: [PATCH 39/57] Adding openblas to nn_bench deps --- conda/environments/bench_ann_cuda-118_arch-x86_64.yaml | 1 + dependencies.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 98b07c2c3f..051dad7eaa 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -33,6 +33,7 @@ dependencies: - ninja - nlohmann_json>=3.11.2 - nvcc_linux-64=11.8 +- openblas - rmm=23.12.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 diff --git a/dependencies.yaml b/dependencies.yaml index e3ca128340..11d14c7be9 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -227,6 +227,7 @@ dependencies: - h5py>=3.8.0 - benchmark>=1.8.2 - rmm=23.12.* + - openblas nn_bench_python: common: - output_types: [conda] From 072d43d07028bf40d7405f671e933b9e44d79ded Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 9 Oct 2023 16:44:54 +0200 Subject: [PATCH 40/57] improve benchmark settings --- cpp/bench/ann/src/common/benchmark.hpp | 6 +++++- cpp/include/raft/neighbors/cagra_types.hpp | 4 ++-- .../raft/neighbors/detail/cagra/cagra_build.cuh | 14 +++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index ab22b9715e..9e09e74369 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -182,14 +182,18 @@ void bench_search(::benchmark::State& state, } // algo is static to cache it between close search runs to save time on index loading static std::string index_file = ""; + bool created_new_algo = false; if (index.file != index_file) { current_algo.reset(); index_file = index.file; + std::cout << "resetting current_algo" << std::endl; } ANN* algo; std::unique_ptr::AnnSearchParam> search_param; try { if (!current_algo || (algo = dynamic_cast*>(current_algo.get())) == nullptr) { + std::cout << "Loading algo" << std::endl; + created_new_algo = true; auto ualgo = ann::create_algo( index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list); algo = ualgo.get(); @@ -206,7 +210,7 @@ void bench_search(::benchmark::State& state, buf distances{algo_property.query_memory_type, k * query_set_size}; buf neighbors{algo_property.query_memory_type, k * query_set_size}; - if (search_param->needs_dataset()) { + if (search_param->needs_dataset() && created_new_algo) { try { algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type), dataset->base_set_size()); diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index 6576ad0f7a..a5a1acdf9d 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -250,10 +250,10 @@ struct index : ann::index { metric_(metric), dataset_(make_device_matrix(res, 0, 0)), dataset_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()), - // dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), + //dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), graph_(make_device_matrix(res, 0, 0)), graph_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()) - // graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) + //graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index 80e964df57..d2f317fe49 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -59,11 +59,11 @@ void build_knn_graph(raft::resources const& res, if (!build_params) { build_params = ivf_pq::index_params{}; - build_params->n_lists = dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); - build_params->pq_dim = raft::Pow2<8>::roundUp(dataset.extent(1) / 2); + build_params->n_lists = 16384; //dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); + build_params->pq_dim = 32; //raft::Pow2<8>::roundUp(dataset.extent(1) / 2); build_params->pq_bits = 8; build_params->kmeans_trainset_fraction = dataset.extent(0) < 10000 ? 1 : 10; - build_params->kmeans_n_iters = 25; + build_params->kmeans_n_iters = 10; //25; build_params->add_data_on_build = true; } @@ -93,12 +93,12 @@ void build_knn_graph(raft::resources const& res, // if (!search_params) { search_params = ivf_pq::search_params{}; - search_params->n_probes = std::min(dataset.extent(1) * 2, build_params->n_lists); - search_params->lut_dtype = CUDA_R_8U; - search_params->internal_distance_dtype = CUDA_R_32F; + search_params->n_probes = 50; //std::min(dataset.extent(1) * 2, build_params->n_lists); + search_params->lut_dtype = CUDA_R_16F; + search_params->internal_distance_dtype = CUDA_R_16F; } const auto top_k = node_degree + 1; - uint32_t gpu_top_k = node_degree * refine_rate.value_or(2.0f); + uint32_t gpu_top_k = node_degree * 4; //refine_rate.value_or(2.0f); gpu_top_k = std::min(std::max(gpu_top_k, top_k), dataset.extent(0)); const auto num_queries = dataset.extent(0); const auto max_batch_size = 1024; From 3e9079d432946c02a6e6efb3917206b82af5e711 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 12 Oct 2023 14:13:21 -0400 Subject: [PATCH 41/57] Fixing style --- .../bench_ann_cuda-118_arch-x86_64.yaml | 2 +- cpp/bench/ann/CMakeLists.txt | 6 ++---- cpp/bench/ann/src/common/benchmark.hpp | 4 ++-- cpp/include/raft/neighbors/cagra_types.hpp | 4 ++-- .../raft/neighbors/detail/cagra/cagra_build.cuh | 15 ++++++++------- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml index 2665ce4205..2c92ad0a99 100644 --- a/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-118_arch-x86_64.yaml @@ -37,7 +37,7 @@ dependencies: - openblas - pandas - pyyaml -- rmm=23.12.* +- rmm==23.12.* - scikit-build>=0.13.1 - sysroot_linux-64==2.17 name: bench_ann_cuda-118_arch-x86_64 diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index a3f5eaffaf..bfc9e24730 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -106,10 +106,8 @@ if(RAFT_ANN_BENCH_USE_GGNN) endif() if(RAFT_ANN_BENCH_USE_FAISS) - # We need to ensure that faiss has all the conda - # information. So we currently use the very ugly - # hammer of `link_libraries` to ensure that all - # targets in this directory and the faiss directory + # We need to ensure that faiss has all the conda information. So we currently use the very ugly + # hammer of `link_libraries` to ensure that all targets in this directory and the faiss directory # will have the conda includes/link dirs link_libraries($) include(cmake/thirdparty/get_faiss.cmake) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 9e09e74369..0a398ade1d 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -182,7 +182,7 @@ void bench_search(::benchmark::State& state, } // algo is static to cache it between close search runs to save time on index loading static std::string index_file = ""; - bool created_new_algo = false; + bool created_new_algo = false; if (index.file != index_file) { current_algo.reset(); index_file = index.file; @@ -194,7 +194,7 @@ void bench_search(::benchmark::State& state, if (!current_algo || (algo = dynamic_cast*>(current_algo.get())) == nullptr) { std::cout << "Loading algo" << std::endl; created_new_algo = true; - auto ualgo = ann::create_algo( + auto ualgo = ann::create_algo( index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list); algo = ualgo.get(); algo->load(index_file); diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index ed624d2f8d..ff0eb2b6c6 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -266,10 +266,10 @@ struct index : ann::index { metric_(metric), dataset_(make_device_matrix(res, 0, 0)), dataset_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()), - //dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), + // dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), graph_(make_device_matrix(res, 0, 0)), graph_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()) - //graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) + // graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index e2365b14c3..b9fc83bc5b 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -59,12 +59,13 @@ void build_knn_graph(raft::resources const& res, node_degree); if (!build_params) { - build_params = ivf_pq::index_params{}; - build_params->n_lists = 16384; //dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); - build_params->pq_dim = 32; //raft::Pow2<8>::roundUp(dataset.extent(1) / 2); - build_params->pq_bits = 8; + build_params = ivf_pq::index_params{}; + build_params->n_lists = + 16384; // dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); + build_params->pq_dim = 32; // raft::Pow2<8>::roundUp(dataset.extent(1) / 2); + build_params->pq_bits = 8; build_params->kmeans_trainset_fraction = dataset.extent(0) < 10000 ? 1 : 10; - build_params->kmeans_n_iters = 10; //25; + build_params->kmeans_n_iters = 10; // 25; build_params->add_data_on_build = true; } @@ -94,12 +95,12 @@ void build_knn_graph(raft::resources const& res, // if (!search_params) { search_params = ivf_pq::search_params{}; - search_params->n_probes = 50; //std::min(dataset.extent(1) * 2, build_params->n_lists); + search_params->n_probes = 50; // std::min(dataset.extent(1) * 2, build_params->n_lists); search_params->lut_dtype = CUDA_R_16F; search_params->internal_distance_dtype = CUDA_R_16F; } const auto top_k = node_degree + 1; - uint32_t gpu_top_k = node_degree * 4; //refine_rate.value_or(2.0f); + uint32_t gpu_top_k = node_degree * 4; // refine_rate.value_or(2.0f); gpu_top_k = std::min(std::max(gpu_top_k, top_k), dataset.extent(0)); const auto num_queries = dataset.extent(0); const auto max_batch_size = 1024; From b33363e6d919450cc5d305152e416fe90aa3d72c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 19 Oct 2023 01:47:22 +0200 Subject: [PATCH 42/57] remove unnecessary changes --- README.md | 22 -- cpp/bench/ann/CMakeLists.txt | 11 +- cpp/bench/ann/src/common/benchmark.hpp | 10 +- cpp/bench/ann/src/common/conf.hpp | 8 +- cpp/bench/ann/src/common/dataset.hpp | 35 ++- cpp/bench/ann/src/raft/orig_cagra.cu | 22 -- cpp/bench/ann/src/raft/orig_cagra_wrapper.h | 276 ----------------- cpp/bench/ann/src/raft/raft_benchmark.cu | 62 ---- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 97 +----- cpp/bench/prims/CMakeLists.txt | 98 +++++-- .../neighbors/detail/cagra/cagra_build.cuh | 19 +- cpp/test/CMakeLists.txt | 277 +++++++++++++----- .../raft-ann-bench/run/conf/deep-100M.json | 14 +- 13 files changed, 318 insertions(+), 633 deletions(-) delete mode 100644 cpp/bench/ann/src/raft/orig_cagra.cu delete mode 100644 cpp/bench/ann/src/raft/orig_cagra_wrapper.h diff --git a/README.md b/README.md index b49543829f..5b1297b63c 100755 --- a/README.md +++ b/README.md @@ -1,25 +1,3 @@ -# RAFT CAGRA vs ORIGINAL CAGRA Benchmarks - -This branch adds a wrapper to ANN bench to call the original (standalone) cagra code. - -## Compile - -1. Set CAGRA path in [CMakeLists.txt](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/cpp/bench/ann/CMakeLists.txt#L215-L216) -2. Compile with RAFT_CARA benchmarks enabled - -## Benchmark - -1. Set optimized [graph path here](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json#L381). This has to be generated with the original CAGAR code. -2. To run original CAGRA, set algorithm parameter to "cagra" [here](https://gitlab-master.nvidia.com/tfeher/raft-fork/-/blob/cagra_pin_dataset_head/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json#L379). To run RAFT CAGRA, use "raft_cagra" as the "algo" param. - -3. `export LD_LIBRARY_PATH=/path/to/cagra/lib:$LD_LIBRARY_PATH` - -4. run benchmark -``` -./RAFT_CAGRA_ANN_BENCH --search --overwrite --data_prefix=/data/ --benchmark_filter=cagra.dim32.*cta --benchmark_out_format=csv --benchmark_out=res_cagra.csv --override_kv=n_queries:1 /workspace1/raft/python/raft-ann-bench//src/raft-ann-bench/run/conf/deep-100M.json -``` - -# ORIGINAL RAFT README BELOW #
 RAFT: Reusable Accelerated Functions and Tools for Vector Search and More
![RAFT tech stack](img/raft-tech-stack-vss.png) diff --git a/cpp/bench/ann/CMakeLists.txt b/cpp/bench/ann/CMakeLists.txt index bfc9e24730..502f371a25 100644 --- a/cpp/bench/ann/CMakeLists.txt +++ b/cpp/bench/ann/CMakeLists.txt @@ -106,8 +106,10 @@ if(RAFT_ANN_BENCH_USE_GGNN) endif() if(RAFT_ANN_BENCH_USE_FAISS) - # We need to ensure that faiss has all the conda information. So we currently use the very ugly - # hammer of `link_libraries` to ensure that all targets in this directory and the faiss directory + # We need to ensure that faiss has all the conda + # information. So we currently use the very ugly + # hammer of `link_libraries` to ensure that all + # targets in this directory and the faiss directory # will have the conda includes/link dirs link_libraries($) include(cmake/thirdparty/get_faiss.cmake) @@ -245,14 +247,9 @@ if(RAFT_ANN_BENCH_USE_RAFT_CAGRA) PATH bench/ann/src/raft/raft_benchmark.cu $<$:bench/ann/src/raft/raft_cagra.cu> - # $<$:bench/ann/src/raft/orig_cagra.cu> LINKS raft::compiled ) - # target_compile_options(RAFT_CAGRA_ANN_BENCH PUBLIC -I/workspace/rapids/knn/cagra/include) - # target_link_options(RAFT_CAGRA_ANN_BENCH PUBLIC -L/workspace/rapids/knn/cagra/lib) - - # target_link_libraries(RAFT_CAGRA_ANN_BENCH PUBLIC cagra) endif() set(RAFT_FAISS_TARGETS faiss::faiss) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 0a398ade1d..4ec977700d 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -182,19 +182,15 @@ void bench_search(::benchmark::State& state, } // algo is static to cache it between close search runs to save time on index loading static std::string index_file = ""; - bool created_new_algo = false; if (index.file != index_file) { current_algo.reset(); index_file = index.file; - std::cout << "resetting current_algo" << std::endl; } ANN* algo; std::unique_ptr::AnnSearchParam> search_param; try { if (!current_algo || (algo = dynamic_cast*>(current_algo.get())) == nullptr) { - std::cout << "Loading algo" << std::endl; - created_new_algo = true; - auto ualgo = ann::create_algo( + auto ualgo = ann::create_algo( index.algo, dataset->distance(), dataset->dim(), index.build_param, index.dev_list); algo = ualgo.get(); algo->load(index_file); @@ -204,13 +200,14 @@ void bench_search(::benchmark::State& state, } catch (const std::exception& e) { return state.SkipWithError("Failed to create an algo: " + std::string(e.what())); } + algo->set_search_param(*search_param); const auto algo_property = parse_algo_property(algo->get_preference(), sp_json); const T* query_set = dataset->query_set(algo_property.query_memory_type); buf distances{algo_property.query_memory_type, k * query_set_size}; buf neighbors{algo_property.query_memory_type, k * query_set_size}; - if (search_param->needs_dataset() && created_new_algo) { + if (search_param->needs_dataset()) { try { algo->set_search_dataset(dataset->base_set(algo_property.dataset_memory_type), dataset->base_set_size()); @@ -221,7 +218,6 @@ void bench_search(::benchmark::State& state, return; } } - algo->set_search_param(*search_param); std::ptrdiff_t batch_offset = 0; std::size_t queries_processed = 0; diff --git a/cpp/bench/ann/src/common/conf.hpp b/cpp/bench/ann/src/common/conf.hpp index b02f1db59f..405b00a74e 100644 --- a/cpp/bench/ann/src/common/conf.hpp +++ b/cpp/bench/ann/src/common/conf.hpp @@ -113,11 +113,9 @@ class Configuration { index.name = conf.at("name"); index.algo = conf.at("algo"); index.build_param = conf.at("build_param"); - std::cout << "reading conf file" << std::endl; - index.file = conf.at("file"); - std::cout << "read conf file" << std::endl; - index.batch_size = batch_size; - index.k = k; + index.file = conf.at("file"); + index.batch_size = batch_size; + index.k = k; if (conf.contains("multigpu")) { for (auto it : conf.at("multigpu")) { diff --git a/cpp/bench/ann/src/common/dataset.hpp b/cpp/bench/ann/src/common/dataset.hpp index b794e9ac2d..ccc5915b3c 100644 --- a/cpp/bench/ann/src/common/dataset.hpp +++ b/cpp/bench/ann/src/common/dataset.hpp @@ -46,7 +46,7 @@ namespace raft::bench::ann { // and int8 type data. // As extensions for this benchmark, half and int data files will have suffixes .f16bin // and .ibin, respectively. -template +template class BinFile { public: BinFile(const std::string& file, @@ -83,10 +83,10 @@ class BinFile { { assert(!read_mode_); if (!fp_) { open_file_(); } - if (fwrite(&nrows, sizeof(SizeT), 1, fp_) != 1) { + if (fwrite(&nrows, sizeof(uint32_t), 1, fp_) != 1) { throw std::runtime_error("fwrite() BinFile " + file_ + " failed"); } - if (fwrite(&ndims, sizeof(SizeT), 1, fp_) != 1) { + if (fwrite(&ndims, sizeof(uint32_t), 1, fp_) != 1) { throw std::runtime_error("fwrite() BinFile " + file_ + " failed"); } @@ -134,11 +134,11 @@ class BinFile { mutable void* mapped_ptr_{nullptr}; }; -template -BinFile::BinFile(const std::string& file, - const std::string& mode, - uint32_t subset_first_row, - uint32_t subset_size) +template +BinFile::BinFile(const std::string& file, + const std::string& mode, + uint32_t subset_first_row, + uint32_t subset_size) : file_(file), read_mode_(mode == "r"), subset_first_row_(subset_first_row), @@ -161,8 +161,8 @@ BinFile::BinFile(const std::string& file, } } -template -void BinFile::open_file_() const +template +void BinFile::open_file_() const { fp_ = fopen(file_.c_str(), read_mode_ ? "r" : "w"); if (!fp_) { throw std::runtime_error("open BinFile failed: " + file_); } @@ -172,15 +172,15 @@ void BinFile::open_file_() const if (stat(file_.c_str(), &statbuf) != 0) { throw std::runtime_error("stat() failed: " + file_); } file_size_ = statbuf.st_size; - SizeT header[2]; - if (fread(header, sizeof(SizeT), 2, fp_) != 2) { + uint32_t header[2]; + if (fread(header, sizeof(uint32_t), 2, fp_) != 2) { throw std::runtime_error("read header of BinFile failed: " + file_); } nrows_ = header[0]; ndims_ = header[1]; size_t expected_file_size = - 2 * sizeof(SizeT) + static_cast(nrows_) * ndims_ * sizeof(T); + 2 * sizeof(uint32_t) + static_cast(nrows_) * ndims_ * sizeof(T); if (file_size_ != expected_file_size) { throw std::runtime_error("expected file size of " + file_ + " is " + std::to_string(expected_file_size) + ", however, actual size is " + @@ -208,8 +208,8 @@ void BinFile::open_file_() const } } -template -void BinFile::check_suffix_() +template +void BinFile::check_suffix_() { auto pos = file_.rfind('.'); if (pos == std::string::npos) { @@ -238,9 +238,8 @@ void BinFile::check_suffix_() throw std::runtime_error("BinFile should has .i8bin suffix: " + file_); } } else { - std::cout << "skipping suffix test" << std::endl; - // throw std::runtime_error( - // "T of BinFile should be one of float, half, int, uint8_t, or int8_t"); + throw std::runtime_error( + "T of BinFile should be one of float, half, int, uint8_t, or int8_t"); } } diff --git a/cpp/bench/ann/src/raft/orig_cagra.cu b/cpp/bench/ann/src/raft/orig_cagra.cu deleted file mode 100644 index f68d67dc22..0000000000 --- a/cpp/bench/ann/src/raft/orig_cagra.cu +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "orig_cagra_wrapper.h" - -namespace raft::bench::ann { -template class Cagra; -template class Cagra; -template class Cagra; -} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h b/cpp/bench/ann/src/raft/orig_cagra_wrapper.h deleted file mode 100644 index 049a7488b7..0000000000 --- a/cpp/bench/ann/src/raft/orig_cagra_wrapper.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -// #include "cuann/ann.h" - -#include "../common/ann_types.hpp" -#include "raft_ann_bench_utils.h" -#include -#include -#include -#include -#include -#include - -namespace raft::bench::ann { - -namespace { -template -std::string get_cagra_dtype() -{ - if constexpr (std::is_same_v) { - return "float"; - } else if constexpr (std::is_same_v) { - return "uint8"; - } else if constexpr (std::is_same_v) { - return "int8"; - } else if constexpr (sizeof(T) == 2) { - return "half"; - } else { - static_assert(!std::is_same_v, "Cagra: type should be float/half/int8/uint8"); - } - return ""; // stop warning of missing return statement -} - -} // namespace - -template -class Cagra : public ANN { - public: - struct BuildParam {}; - - using typename ANN::AnnSearchParam; - struct SearchParam : public AnnSearchParam { - raft::neighbors::experimental::cagra::search_params p; - auto needs_dataset() const -> bool override { return true; } - std::string search_mode; // "single-cta", "multi-cta", or "multi-kernel" - int batch_size; - int k; - }; - - Cagra(Metric metric, int dim, const BuildParam&) : ANN(metric, dim) {} - Cagra(const Cagra&) = delete; - const Cagra& operator=(const Cagra&) = delete; - ~Cagra(); - - void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) override; - - void set_search_param(const AnnSearchParam& param) override; - - void search(const T* queries, - int batch_size, - int k, - size_t* neighbors, - float* distances, - cudaStream_t stream = 0) const override; - - // to enable dataset access from GPU memory - AlgoProperty get_preference() const override - { - AlgoProperty property; - property.dataset_memory_type = MemoryType::Device; - property.query_memory_type = MemoryType::Device; - return property; - } - - void save(const std::string& file) const override; - void load(const std::string& file) override; - - void set_search_dataset(const T* dataset, size_t nrow) override - { - // std::cout << "Cagra set_search_dataset" << std::endl; - dataset_ = dataset; - if (nrow_ == 0) { - nrow_ = nrow; - } else if (nrow_ != nrow) { - throw std::runtime_error("inconsistent nrow between dataset and graph"); - } - }; - - private: - raft::device_resources handle_; - - void check_search_param_(SearchParam& param); - - using ANN::dim_; - SearchParam search_param_; - void* plan_{nullptr}; - - const T* dataset_{nullptr}; - size_t nrow_{0}; - INDEX_T* graph_{nullptr}; - size_t degree_{0}; - - INDEX_T* tmp_neighbors_{nullptr}; -}; - -template -Cagra::~Cagra() -{ - if (plan_) { destroy_plan(plan_); } - RAFT_CUDA_TRY_NO_THROW(cudaFree(graph_)); - RAFT_CUDA_TRY_NO_THROW(cudaFree(tmp_neighbors_)); -} - -template -void Cagra::build(const T*, size_t, cudaStream_t) -{ - throw std::runtime_error("Cagra's build() is not available now, use its tools to build index"); -} - -template -void Cagra::set_search_param(const AnnSearchParam& param) -{ - // std::cout << "Cagra set_search_param" << std::endl; - if (!dataset_ || nrow_ == 0) { throw std::runtime_error("Cagra: dataset is not loaded"); } - if (!graph_ || degree_ == 0) { throw std::runtime_error("Cagra: index is not loaded"); } - - auto new_search_param = dynamic_cast(param); - - if (plan_) { destroy_plan(plan_); } - if (tmp_neighbors_) RAFT_CUDA_TRY(cudaFree(tmp_neighbors_)); - RAFT_CUDA_TRY( - cudaMalloc(&tmp_neighbors_, sizeof(size_t) * new_search_param.batch_size * new_search_param.k)); - - search_param_ = new_search_param; - // std::cout << "Cagra creating new plan" << std::endl; - create_plan(&plan_, - get_cagra_dtype(), - 0, // team_size - search_param_.search_mode, - search_param_.k, - search_param_.p.itopk_size, - search_param_.p.search_width, - search_param_.p.min_iterations, - search_param_.p.max_iterations, - search_param_.batch_size, - 0, // load_bit_length - 0, // thread_block_size - search_param_.search_mode == "multi-cta" ? "hash" : "auto", // hashmap_mode - 0, // hashmap_min_bitlen - 0.5, // hashmap_max_fill_rate - nrow_, - dim_, - degree_, - dataset_, - graph_); -} - -template -void Cagra::search(const T* queries, - int batch_size, - int k, - size_t* neighbors, - float* distances, - cudaStream_t stream) const -{ - static_assert(std::is_same_v); - assert(plan_); - - if (k != search_param_.k) { - throw std::runtime_error("wrong configuration: k (" + std::to_string(k) + - ") != search_param.k (" + std::to_string(search_param_.k) + ")"); - } - if (batch_size > search_param_.batch_size) { - throw std::runtime_error("wrong configuration: batch_size (" + std::to_string(batch_size) + - ") > search_param.batch_size (" + - std::to_string(search_param_.batch_size) + ")"); - } - - // uint32_t neighbors_ptr = std::is_same::value ? tmp_neighbors_ - - // std::cout << "Cagra calling search" << std::endl; - ::search(plan_, - tmp_neighbors_, - distances, - queries, - batch_size, - 1, - 0x128394, - nullptr, - 0, - nullptr, - stream); - - // std::cout << "Cagra calling unaryop" << std::endl; - raft::linalg::unaryOp(neighbors, tmp_neighbors_, batch_size * k, raft::cast_op(), stream); - handle_.sync_stream(stream); -} - -template -void Cagra::save(const std::string& file) const -{ - FILE* fp = fopen(file.c_str(), "w"); - if (!fp) { throw std::runtime_error("fail to open " + file + " for writing"); } - - if (fwrite(&nrow_, sizeof(nrow_), 1, fp) != 1) { - throw std::runtime_error("fwrite() " + file + " failed"); - } - if (fwrite(°ree_, sizeof(degree_), 1, fp) != 1) { - throw std::runtime_error("fwrite() " + file + " failed"); - } - - size_t total = nrow_ * degree_; - auto h_graph = new INDEX_T[total]; - RAFT_CUDA_TRY(cudaMemcpy(h_graph, graph_, sizeof(*graph_) * total, cudaMemcpyDeviceToHost)); - if (fwrite(h_graph, sizeof(*h_graph), total, fp) != total) { - throw std::runtime_error("fwrite() " + file + " failed"); - } - delete[] h_graph; -} - -template -void Cagra::load(const std::string& file) -{ - // std::cout << "Cagra load graph" << std::endl; - FILE* fp = fopen(file.c_str(), "r"); - if (!fp) { throw std::runtime_error("fail to open " + file); } - - size_t nrow; - if (fread(&nrow, sizeof(nrow), 1, fp) != 1) { - throw std::runtime_error("fread() " + file + " failed"); - } - if (nrow_ == 0) { - nrow_ = nrow; - } else if (nrow_ != nrow) { - throw std::runtime_error("inconsistent nrow between dataset and graph"); - } - - if (fread(°ree_, sizeof(degree_), 1, fp) != 1) { - throw std::runtime_error("fread() " + file + " failed"); - } - - size_t total = nrow_ * degree_; - auto h_graph = new INDEX_T[total]; - if (fread(h_graph, sizeof(*h_graph), total, fp) != total) { - throw std::runtime_error("fread() " + file + " failed"); - } - // std::cout << "Cagra alloc device graph" << std::endl; - RAFT_CUDA_TRY(cudaMalloc(&graph_, sizeof(*graph_) * total)); - RAFT_CUDA_TRY(cudaMemcpy(graph_, h_graph, sizeof(*graph_) * total, cudaMemcpyHostToDevice)); - delete[] h_graph; -} - -} // namespace raft::bench::ann diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index 9e4f39d835..fa20c5c223 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -46,12 +46,6 @@ extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; extern template class raft::bench::ann::RaftCagra; #endif -#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA -#include "orig_cagra_wrapper.h" -extern template class raft::bench::ann::Cagra; -extern template class raft::bench::ann::Cagra; -extern template class raft::bench::ann::Cagra; -#endif #define JSON_DIAGNOSTICS 1 #include @@ -186,48 +180,6 @@ void parse_search_param(const nlohmann::json& conf, } } #endif -#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA -template -void parse_build_param(const nlohmann::json& conf, - typename raft::bench::ann::Cagra::BuildParam& param) -{ -} -template -void parse_search_param(const nlohmann::json& conf, - typename raft::bench::ann::Cagra::SearchParam& param) -{ - if (conf.contains("itopk")) { param.p.itopk_size = conf.at("itopk"); } - if (conf.contains("search_width")) { param.p.search_width = conf.at("search_width"); } - if (conf.contains("max_iterations")) { param.p.max_iterations = conf.at("max_iterations"); } - if (conf.contains("algo")) { - if (conf.at("algo") == "single_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::SINGLE_CTA; - param.search_mode = "single-cta"; - } else if (conf.at("algo") == "multi_cta") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_CTA; - param.search_mode = "multi-cta"; - } else if (conf.at("algo") == "multi_kernel") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::MULTI_KERNEL; - param.search_mode = "multi-kernel"; - } else if (conf.at("algo") == "auto") { - param.p.algo = raft::neighbors::experimental::cagra::search_algo::AUTO; - } else { - std::string tmp = conf.at("algo"); - THROW("Invalid value for algo: %s", tmp.c_str()); - } - } - if (conf.contains("k")) { - param.k = conf.at("k"); - } else { - param.k = 10; - } - if (conf.contains("batch_size")) { - param.batch_size = conf.at("batch_size"); - } else { - param.batch_size = 10000; - }; -} -#endif template std::unique_ptr> create_algo(const std::string& algo, @@ -270,13 +222,6 @@ std::unique_ptr> create_algo(const std::string& algo, parse_build_param(conf, param); ann = std::make_unique>(metric, dim, param); } -#endif -#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA - if (algo == "cagra") { - typename raft::bench::ann::Cagra::BuildParam param; - // parse_build_param(conf, param); - ann = std::make_unique>(metric, dim, param); - } #endif if (!ann) { throw std::runtime_error("invalid algo: '" + algo + "'"); } @@ -314,13 +259,6 @@ std::unique_ptr::AnnSearchParam> create_search parse_search_param(conf, *param); return param; } -#endif -#ifdef RAFT_ANN_BENCH_USE_ORIG_CAGRA - if (algo == "cagra") { - auto param = std::make_unique::SearchParam>(); - parse_search_param(conf, *param); - return param; - } #endif // else throw std::runtime_error("invalid algo: '" + algo + "'"); diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 7736a5ab94..19c5151186 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -19,14 +19,10 @@ #include #include #include -#include #include #include #include #include -#include -#include -#include #include #include #include @@ -39,7 +35,6 @@ #include #include "../common/ann_types.hpp" -#include "../common/dataset.hpp" #include "raft_ann_bench_utils.h" #include @@ -61,8 +56,7 @@ class RaftCagra : public ANN { : ANN(metric, dim), index_params_(param), dimension_(dim), - mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) //, - // graph_(make_device_matrix(handle_, 0, 0)) + mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) { rmm::mr::set_current_device_resource(&mr_); index_params_.metric = parse_metric_type(metric); @@ -103,12 +97,9 @@ class RaftCagra : public ANN { raft::device_resources handle_; BuildParam index_params_; raft::neighbors::cagra::search_params search_params_; - // raft::device_matrix graph_; - raft::host_matrix_view graph_; std::optional> index_; int device_; int dimension_; - std::unique_ptr> graph_file_; }; template @@ -137,98 +128,20 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) template void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) { - auto dataset_v = raft::make_host_matrix_view(dataset, nrow, this->dim_); - index_.emplace(handle_, parse_metric_type(this->metric_), dataset_v, make_const_mdspan(graph_)); - - // index_->update_dataset(handle_, - // raft::make_host_matrix_view(dataset, nrow, - // this->dim_)); + index_->update_dataset(handle_, + raft::make_host_matrix_view(dataset, nrow, this->dim_)); } template void RaftCagra::save(const std::string& file) const { - // RAFT serialization - // raft::neighbors::cagra::serialize(handle_, file, *index_, false); - - // 2. Saving only knn graph - // std::ofstream of(file, std::ios::out | std::ios::binary); - // serialize_mdspan(handle_, of, index_->graph()); - // of.close(); - - // size_t degree = index_->graph_degree(); - // std::cout << "Saving knn graph" << std::endl; - // for (int i = 0; i < std::min(index_->size(), 10); i++) { - // print_vector("k", index_->graph().data_handle() + i * degree, degree, std::cout); - // } - - // 3. Orig CAGRA type of serialization - std::ofstream of(file, std::ios::out | std::ios::binary); - std::size_t size = index_->size(); - std::size_t degree = index_->graph_degree(); - - of.write(reinterpret_cast(&size), sizeof(size)); - of.write(reinterpret_cast(°ree), sizeof(degree)); - - auto graph_h = make_host_matrix(size, degree); - raft::copy(graph_h.data_handle(), - index_->graph().data_handle(), - index_->graph().size(), - resource::get_cuda_stream(handle_)); - resource::sync_stream(handle_); - - of.write(reinterpret_cast(graph_h.data_handle()), graph_h.size() * sizeof(IdxT)); - - of.close(); + raft::neighbors::cagra::serialize(handle_, file, *index_, false); } template void RaftCagra::load(const std::string& file) { - // 1. Original index saving method - // index_ = raft::neighbors::cagra::deserialize(handle_, file); - - // // 2. read only knn_graph - // std::ifstream is(file, std::ios::in | std::ios::binary); - // raft::detail::numpy_serializer::header_t header = - // raft::detail::numpy_serializer::read_header(is); is.seekg(0); /* rewind*/ graph_ = - // make_device_matrix(handle_, header.shape[0], header.shape[1]); - // deserialize_mdspan(handle_, is, graph_.view()); - // is.close(); - - // 3. Cagra's knn file format - // std::ifstream ifs(file, std::ios::in | std::ios::binary); - // if (!ifs) { - // throw std::runtime_error("File not exist : " + file + " (`" + __func__ + "` in " + __FILE__ + - // ")"); - // } - - // std::size_t size, degree; - - // ifs.read(reinterpret_cast(&size), sizeof(size)); - // ifs.read(reinterpret_cast(°ree), sizeof(degree)); - - // auto graph_h = make_host_matrix(size, degree); - // graph_ = make_device_matrix(handle_, size, degree); - - // for (std::size_t i = 0; i < size; i++) { - // ifs.read(reinterpret_cast(graph_h.data_handle() + i * degree), sizeof(IdxT) * degree); - // } - // ifs.close(); - - // raft::copy( - // graph_.data_handle(), graph_h.data_handle(), graph_.size(), - // resource::get_cuda_stream(handle_)); - // resource::sync_stream(handle_); - - graph_file_ = std::make_unique>(file, "r"); - size_t n_rows; - int ndims; - graph_file_->get_shape(&n_rows, &ndims); - IdxT* ptr = graph_file_->map(); - graph_ = make_host_matrix_view(ptr, (int64_t)n_rows, (int64_t)ndims); - // aind_v = make_device_matrix_view( - // indices.data_handle(), params_.n_queries, params_.k); + index_ = raft::neighbors::cagra::deserialize(handle_, file); } template diff --git a/cpp/bench/prims/CMakeLists.txt b/cpp/bench/prims/CMakeLists.txt index f9edb43ef1..5da2cd916b 100644 --- a/cpp/bench/prims/CMakeLists.txt +++ b/cpp/bench/prims/CMakeLists.txt @@ -74,47 +74,87 @@ function(ConfigureBench) endfunction() if(BUILD_PRIMS_BENCH) - # ConfigureBench( NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu - # bench/prims/cluster/kmeans.cu bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY ) + ConfigureBench( + NAME CORE_BENCH PATH bench/prims/core/bitset.cu bench/prims/core/copy.cu bench/prims/main.cpp + ) - # ConfigureBench( NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu - # bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp ) + ConfigureBench( + NAME CLUSTER_BENCH PATH bench/prims/cluster/kmeans_balanced.cu bench/prims/cluster/kmeans.cu + bench/prims/main.cpp OPTIONAL LIB EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureBench( NAME DISTANCE_BENCH PATH bench/prims/distance/distance_cosine.cu - # bench/prims/distance/distance_exp_l2.cu bench/prims/distance/distance_l1.cu - # bench/prims/distance/distance_unexp_l2.cu bench/prims/distance/fused_l2_nn.cu - # bench/prims/distance/masked_nn.cu bench/prims/distance/kernels.cu bench/prims/main.cpp OPTIONAL - # LIB EXPLICIT_INSTANTIATE_ONLY ) + ConfigureBench( + NAME TUNE_DISTANCE PATH bench/prims/distance/tune_pairwise/kernel.cu + bench/prims/distance/tune_pairwise/bench.cu bench/prims/main.cpp + ) - # ConfigureBench( NAME LINALG_BENCH PATH bench/prims/linalg/add.cu - # bench/prims/linalg/map_then_reduce.cu bench/prims/linalg/matrix_vector_op.cu - # bench/prims/linalg/norm.cu bench/prims/linalg/normalize.cu - # bench/prims/linalg/reduce_cols_by_key.cu bench/prims/linalg/reduce_rows_by_key.cu - # bench/prims/linalg/reduce.cu bench/prims/main.cpp ) + ConfigureBench( + NAME + DISTANCE_BENCH + PATH + bench/prims/distance/distance_cosine.cu + bench/prims/distance/distance_exp_l2.cu + bench/prims/distance/distance_l1.cu + bench/prims/distance/distance_unexp_l2.cu + bench/prims/distance/fused_l2_nn.cu + bench/prims/distance/masked_nn.cu + bench/prims/distance/kernels.cu + bench/prims/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureBench( NAME MATRIX_BENCH PATH bench/prims/matrix/argmin.cu bench/prims/matrix/gather.cu - # bench/prims/matrix/select_k.cu bench/prims/matrix/main.cpp OPTIONAL LIB - # EXPLICIT_INSTANTIATE_ONLY ) + ConfigureBench( + NAME + LINALG_BENCH + PATH + bench/prims/linalg/add.cu + bench/prims/linalg/map_then_reduce.cu + bench/prims/linalg/matrix_vector_op.cu + bench/prims/linalg/norm.cu + bench/prims/linalg/normalize.cu + bench/prims/linalg/reduce_cols_by_key.cu + bench/prims/linalg/reduce_rows_by_key.cu + bench/prims/linalg/reduce.cu + bench/prims/main.cpp + ) + + ConfigureBench( + NAME + MATRIX_BENCH + PATH + bench/prims/matrix/argmin.cu + bench/prims/matrix/gather.cu + bench/prims/matrix/select_k.cu + bench/prims/matrix/main.cpp + OPTIONAL + LIB + EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureBench( NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu - # bench/prims/random/permute.cu bench/prims/random/rng.cu bench/prims/main.cpp ) + ConfigureBench( + NAME RANDOM_BENCH PATH bench/prims/random/make_blobs.cu bench/prims/random/permute.cu + bench/prims/random/rng.cu bench/prims/main.cpp + ) - # ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) + ConfigureBench(NAME SPARSE_BENCH PATH bench/prims/sparse/convert_csr.cu bench/prims/main.cpp) ConfigureBench( NAME NEIGHBORS_BENCH PATH - # bench/prims/neighbors/knn/brute_force_float_int64_t.cu - # bench/prims/neighbors/knn/brute_force_float_uint32_t.cu + bench/prims/neighbors/knn/brute_force_float_int64_t.cu + bench/prims/neighbors/knn/brute_force_float_uint32_t.cu bench/prims/neighbors/knn/cagra_float_uint32_t.cu - # bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu - # bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu - # bench/prims/neighbors/refine_float_int64_t.cu bench/prims/neighbors/refine_uint8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_float_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_int8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_flat_uint8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_float_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_int8_t_int64_t.cu + bench/prims/neighbors/knn/ivf_pq_uint8_t_int64_t.cu + bench/prims/neighbors/refine_float_int64_t.cu + bench/prims/neighbors/refine_uint8_t_int64_t.cu bench/prims/main.cpp OPTIONAL LIB diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh index b9fc83bc5b..40024a3deb 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_build.cuh @@ -59,13 +59,12 @@ void build_knn_graph(raft::resources const& res, node_degree); if (!build_params) { - build_params = ivf_pq::index_params{}; - build_params->n_lists = - 16384; // dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); - build_params->pq_dim = 32; // raft::Pow2<8>::roundUp(dataset.extent(1) / 2); - build_params->pq_bits = 8; + build_params = ivf_pq::index_params{}; + build_params->n_lists = dataset.extent(0) < 4 * 2500 ? 4 : (uint32_t)(dataset.extent(0) / 2500); + build_params->pq_dim = raft::Pow2<8>::roundUp(dataset.extent(1) / 2); + build_params->pq_bits = 8; build_params->kmeans_trainset_fraction = dataset.extent(0) < 10000 ? 1 : 10; - build_params->kmeans_n_iters = 10; // 25; + build_params->kmeans_n_iters = 25; build_params->add_data_on_build = true; } @@ -95,12 +94,12 @@ void build_knn_graph(raft::resources const& res, // if (!search_params) { search_params = ivf_pq::search_params{}; - search_params->n_probes = 50; // std::min(dataset.extent(1) * 2, build_params->n_lists); - search_params->lut_dtype = CUDA_R_16F; - search_params->internal_distance_dtype = CUDA_R_16F; + search_params->n_probes = std::min(dataset.extent(1) * 2, build_params->n_lists); + search_params->lut_dtype = CUDA_R_8U; + search_params->internal_distance_dtype = CUDA_R_32F; } const auto top_k = node_degree + 1; - uint32_t gpu_top_k = node_degree * 4; // refine_rate.value_or(2.0f); + uint32_t gpu_top_k = node_degree * refine_rate.value_or(2.0f); gpu_top_k = std::min(std::max(gpu_top_k, top_k), dataset.extent(0)); const auto num_queries = dataset.extent(0); const auto max_batch_size = 1024; diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 3d5900ac02..9b9b882d1d 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -95,9 +95,18 @@ endfunction() # * distance tests ------------------------------------------------------------------------- if(BUILD_TESTS) - # ConfigureTest( NAME CLUSTER_TEST PATH test/cluster/kmeans.cu test/cluster/kmeans_balanced.cu - # test/cluster/cluster_solvers.cu test/cluster/linkage.cu test/cluster/kmeans_find_k.cu LIB - # EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME + CLUSTER_TEST + PATH + test/cluster/kmeans.cu + test/cluster/kmeans_balanced.cu + test/cluster/cluster_solvers.cu + test/cluster/linkage.cu + test/cluster/kmeans_find_k.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) ConfigureTest( NAME @@ -131,64 +140,143 @@ if(BUILD_TESTS) EXPLICIT_INSTANTIATE_ONLY ) - # ConfigureTest( NAME DISTANCE_TEST PATH test/distance/dist_adj.cu - # test/distance/dist_adj_distance_instance.cu test/distance/dist_canberra.cu - # test/distance/dist_correlation.cu test/distance/dist_cos.cu test/distance/dist_hamming.cu - # test/distance/dist_hellinger.cu test/distance/dist_inner_product.cu - # test/distance/dist_jensen_shannon.cu test/distance/dist_kl_divergence.cu - # test/distance/dist_l1.cu test/distance/dist_l2_exp.cu test/distance/dist_l2_unexp.cu - # test/distance/dist_l2_sqrt_exp.cu test/distance/dist_l_inf.cu test/distance/dist_lp_unexp.cu - # test/distance/dist_russell_rao.cu test/distance/masked_nn.cu - # test/distance/masked_nn_compress_to_bits.cu test/distance/fused_l2_nn.cu test/distance/gram.cu - # LIB EXPLICIT_INSTANTIATE_ONLY ) - - # list( APPEND EXT_HEADER_TEST_SOURCES test/ext_headers/raft_neighbors_brute_force.cu - # test/ext_headers/raft_distance_distance.cu - # test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu - # test/ext_headers/raft_matrix_detail_select_k.cu test/ext_headers/raft_neighbors_ball_cover.cu - # test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu - # test/ext_headers/raft_distance_fused_l2_nn.cu test/ext_headers/raft_neighbors_ivf_pq.cu - # test/ext_headers/raft_util_memory_pool.cpp test/ext_headers/raft_neighbors_ivf_flat.cu - # test/ext_headers/raft_core_logger.cpp test/ext_headers/raft_neighbors_refine.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu - # test/ext_headers/raft_neighbors_detail_selection_faiss.cu - # test/ext_headers/raft_linalg_detail_coalesced_reduction.cu - # test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu - # test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu - # test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu ) - - # # Test that the split headers compile in isolation with: # # * - # EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined # * - # EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined # * EXT_HEADERS_TEST_IMPLICIT: no - # macros defined. ConfigureTest( NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH - # ${EXT_HEADER_TEST_SOURCES} LIB EXPLICIT_INSTANTIATE_ONLY ) ConfigureTest(NAME - # EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) ConfigureTest(NAME - # EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) - - # ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) - - # ConfigureTest( NAME LINALG_TEST PATH test/linalg/add.cu test/linalg/axpy.cu - # test/linalg/binary_op.cu test/linalg/cholesky_r1.cu test/linalg/coalesced_reduction.cu - # test/linalg/divide.cu test/linalg/dot.cu test/linalg/eig.cu test/linalg/eig_sel.cu - # test/linalg/gemm_layout.cu test/linalg/gemv.cu test/linalg/map.cu test/linalg/map_then_reduce.cu - # test/linalg/matrix_vector.cu test/linalg/matrix_vector_op.cu test/linalg/mean_squared_error.cu - # test/linalg/multiply.cu test/linalg/norm.cu test/linalg/normalize.cu test/linalg/power.cu - # test/linalg/randomized_svd.cu test/linalg/reduce.cu test/linalg/reduce_cols_by_key.cu - # test/linalg/reduce_rows_by_key.cu test/linalg/rsvd.cu test/linalg/sqrt.cu - # test/linalg/strided_reduction.cu test/linalg/subtract.cu test/linalg/svd.cu - # test/linalg/ternary_op.cu test/linalg/transpose.cu test/linalg/unary_op.cu ) - - # ConfigureTest( NAME MATRIX_TEST PATH test/matrix/argmax.cu test/matrix/argmin.cu - # test/matrix/columnSort.cu test/matrix/diagonal.cu test/matrix/gather.cu test/matrix/scatter.cu - # test/matrix/eye.cu test/matrix/linewise_op.cu test/matrix/math.cu test/matrix/matrix.cu - # test/matrix/norm.cu test/matrix/reverse.cu test/matrix/slice.cu test/matrix/triangular.cu - # test/sparse/spectral_matrix.cu LIB EXPLICIT_INSTANTIATE_ONLY ) - - # ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB - # EXPLICIT_INSTANTIATE_ONLY) - - # ConfigureTest( NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB - # EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME CORE_TEST PATH test/core/stream_view.cpp test/core/mdspan_copy.cpp LIB + EXPLICIT_INSTANTIATE_ONLY NOCUDA + ) + + ConfigureTest( + NAME + DISTANCE_TEST + PATH + test/distance/dist_adj.cu + test/distance/dist_adj_distance_instance.cu + test/distance/dist_canberra.cu + test/distance/dist_correlation.cu + test/distance/dist_cos.cu + test/distance/dist_hamming.cu + test/distance/dist_hellinger.cu + test/distance/dist_inner_product.cu + test/distance/dist_jensen_shannon.cu + test/distance/dist_kl_divergence.cu + test/distance/dist_l1.cu + test/distance/dist_l2_exp.cu + test/distance/dist_l2_unexp.cu + test/distance/dist_l2_sqrt_exp.cu + test/distance/dist_l_inf.cu + test/distance/dist_lp_unexp.cu + test/distance/dist_russell_rao.cu + test/distance/masked_nn.cu + test/distance/masked_nn_compress_to_bits.cu + test/distance/fused_l2_nn.cu + test/distance/gram.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + list( + APPEND + EXT_HEADER_TEST_SOURCES + test/ext_headers/raft_neighbors_brute_force.cu + test/ext_headers/raft_distance_distance.cu + test/ext_headers/raft_distance_detail_pairwise_matrix_dispatch.cu + test/ext_headers/raft_matrix_detail_select_k.cu + test/ext_headers/raft_neighbors_ball_cover.cu + test/ext_headers/raft_spatial_knn_detail_fused_l2_knn.cu + test/ext_headers/raft_distance_fused_l2_nn.cu + test/ext_headers/raft_neighbors_ivf_pq.cu + test/ext_headers/raft_util_memory_pool.cpp + test/ext_headers/raft_neighbors_ivf_flat.cu + test/ext_headers/raft_core_logger.cpp + test/ext_headers/raft_neighbors_refine.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_search.cu + test/ext_headers/raft_neighbors_detail_selection_faiss.cu + test/ext_headers/raft_linalg_detail_coalesced_reduction.cu + test/ext_headers/raft_spatial_knn_detail_ball_cover_registers.cu + test/ext_headers/raft_neighbors_detail_ivf_flat_interleaved_scan.cu + test/ext_headers/raft_neighbors_detail_ivf_pq_compute_similarity.cu + ) + + # Test that the split headers compile in isolation with: + # + # * EXT_HEADERS_TEST_COMPILED_EXPLICIT: RAFT_COMPILED, RAFT_EXPLICIT_INSTANTIATE_ONLY defined + # * EXT_HEADERS_TEST_COMPILED_IMPLICIT: RAFT_COMPILED defined + # * EXT_HEADERS_TEST_IMPLICIT: no macros defined. + ConfigureTest( + NAME EXT_HEADERS_TEST_COMPILED_EXPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB + EXPLICIT_INSTANTIATE_ONLY + ) + ConfigureTest(NAME EXT_HEADERS_TEST_COMPILED_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES} LIB) + ConfigureTest(NAME EXT_HEADERS_TEST_IMPLICIT PATH ${EXT_HEADER_TEST_SOURCES}) + + ConfigureTest(NAME LABEL_TEST PATH test/label/label.cu test/label/merge_labels.cu) + + ConfigureTest( + NAME + LINALG_TEST + PATH + test/linalg/add.cu + test/linalg/axpy.cu + test/linalg/binary_op.cu + test/linalg/cholesky_r1.cu + test/linalg/coalesced_reduction.cu + test/linalg/divide.cu + test/linalg/dot.cu + test/linalg/eig.cu + test/linalg/eig_sel.cu + test/linalg/gemm_layout.cu + test/linalg/gemv.cu + test/linalg/map.cu + test/linalg/map_then_reduce.cu + test/linalg/matrix_vector.cu + test/linalg/matrix_vector_op.cu + test/linalg/mean_squared_error.cu + test/linalg/multiply.cu + test/linalg/norm.cu + test/linalg/normalize.cu + test/linalg/power.cu + test/linalg/randomized_svd.cu + test/linalg/reduce.cu + test/linalg/reduce_cols_by_key.cu + test/linalg/reduce_rows_by_key.cu + test/linalg/rsvd.cu + test/linalg/sqrt.cu + test/linalg/strided_reduction.cu + test/linalg/subtract.cu + test/linalg/svd.cu + test/linalg/ternary_op.cu + test/linalg/transpose.cu + test/linalg/unary_op.cu + ) + + ConfigureTest( + NAME + MATRIX_TEST + PATH + test/matrix/argmax.cu + test/matrix/argmin.cu + test/matrix/columnSort.cu + test/matrix/diagonal.cu + test/matrix/gather.cu + test/matrix/scatter.cu + test/matrix/eye.cu + test/matrix/linewise_op.cu + test/matrix/math.cu + test/matrix/matrix.cu + test/matrix/norm.cu + test/matrix/reverse.cu + test/matrix/slice.cu + test/matrix/triangular.cu + test/sparse/spectral_matrix.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) + + ConfigureTest(NAME MATRIX_SELECT_TEST PATH test/matrix/select_k.cu LIB EXPLICIT_INSTANTIATE_ONLY) + + ConfigureTest( + NAME MATRIX_SELECT_LARGE_TEST PATH test/matrix/select_large_k.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) ConfigureTest( NAME @@ -206,25 +294,62 @@ if(BUILD_TESTS) test/random/sample_without_replacement.cu ) - # ConfigureTest( NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu - # test/linalg/eigen_solvers.cu test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME SOLVERS_TEST PATH test/cluster/cluster_solvers_deprecated.cu test/linalg/eigen_solvers.cu + test/lap/lap.cu test/sparse/mst.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureTest( NAME SPARSE_TEST PATH test/sparse/add.cu test/sparse/convert_coo.cu - # test/sparse/convert_csr.cu test/sparse/csr_row_slice.cu test/sparse/csr_to_dense.cu - # test/sparse/csr_transpose.cu test/sparse/degree.cu test/sparse/filter.cu test/sparse/norm.cu - # test/sparse/normalize.cu test/sparse/reduce.cu test/sparse/row_op.cu test/sparse/sort.cu - # test/sparse/spgemmi.cu test/sparse/symmetrize.cu ) + ConfigureTest( + NAME + SPARSE_TEST + PATH + test/sparse/add.cu + test/sparse/convert_coo.cu + test/sparse/convert_csr.cu + test/sparse/csr_row_slice.cu + test/sparse/csr_to_dense.cu + test/sparse/csr_transpose.cu + test/sparse/degree.cu + test/sparse/filter.cu + test/sparse/norm.cu + test/sparse/normalize.cu + test/sparse/reduce.cu + test/sparse/row_op.cu + test/sparse/sort.cu + test/sparse/spgemmi.cu + test/sparse/symmetrize.cu + ) - # ConfigureTest( NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu - # test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME SPARSE_DIST_TEST PATH test/sparse/dist_coo_spmv.cu test/sparse/distance.cu + test/sparse/gram.cu LIB EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureTest( NAME SPARSE_NEIGHBORS_TEST PATH test/sparse/neighbors/cross_component_nn.cu - # test/sparse/neighbors/brute_force.cu test/sparse/neighbors/knn_graph.cu LIB - # EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME + SPARSE_NEIGHBORS_TEST + PATH + test/sparse/neighbors/cross_component_nn.cu + test/sparse/neighbors/brute_force.cu + test/sparse/neighbors/knn_graph.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) - # ConfigureTest( NAME NEIGHBORS_TEST PATH test/neighbors/knn.cu test/neighbors/fused_l2_knn.cu - # test/neighbors/tiled_knn.cu test/neighbors/haversine.cu test/neighbors/ball_cover.cu - # test/neighbors/epsilon_neighborhood.cu test/neighbors/refine.cu LIB EXPLICIT_INSTANTIATE_ONLY ) + ConfigureTest( + NAME + NEIGHBORS_TEST + PATH + test/neighbors/knn.cu + test/neighbors/fused_l2_knn.cu + test/neighbors/tiled_knn.cu + test/neighbors/haversine.cu + test/neighbors/ball_cover.cu + test/neighbors/epsilon_neighborhood.cu + test/neighbors/refine.cu + LIB + EXPLICIT_INSTANTIATE_ONLY + ) ConfigureTest( NAME diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json index a4e9edd84f..ea92a0de18 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/deep-100M.json @@ -1,9 +1,9 @@ { "dataset": { "name": "deep-100M", - "base_file": "deep-1B/base.1B.fbin", + "base_file": "deep-100M/base.1B.fbin", "subset_size": 100000000, - "query_file": "deep-1B/query.public.10K.fbin", + "query_file": "deep-100M/query.public.10K.fbin", "groundtruth_neighbors_file": "deep-100M/groundtruth.neighbors.ibin", "distance": "euclidean" }, @@ -375,10 +375,10 @@ "search_result_file": "result/deep-image-96-angular/raft_ivf_pq/dimpq512-cluster1024-float-float" }, { - "name": "raft_cagra.dim32.single_cta", + "name": "raft_cagra.dim32", "algo": "raft_cagra", "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48}, - "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "file": "deep-100M/raft_cagra/dim32", "search_params": [ {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "single_cta"}, {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "single_cta"}, @@ -398,7 +398,7 @@ "name": "raft_cagra.dim32.multi_cta", "algo": "raft_cagra", "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48}, - "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "file": "deep-100M/raft_cagra/dim32", "search_params": [ {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_cta"}, {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_cta"}, @@ -416,9 +416,9 @@ }, { "name": "raft_cagra.dim32.multi_kernel", - "algo": "cagra", + "algo": "raft_cagra", "build_param": {"graph_degree": 32, "intermediate_graph_degree": 48}, - "file": "/workspace1/index/cagra/deep100m_n48_fp32.opt32", + "file": "deep-100M/raft_cagra/dim32", "search_params": [ {"itopk": 32, "search_width": 1, "max_iterations": 0, "algo": "multi_kernel"}, {"itopk": 32, "search_width": 1, "max_iterations": 32, "algo": "multi_kernel"}, From 32e7b31cd37e90bb02f5a3529389ddd92e313ed4 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Sat, 21 Oct 2023 00:04:23 +0200 Subject: [PATCH 43/57] Do specific cagra graph/dataset memory allocation in the benchmark --- .../src/common}/cuda_huge_page_resource.hpp | 0 .../ann/src/common}/cuda_pinned_resource.hpp | 0 cpp/bench/ann/src/raft/raft_benchmark.cu | 17 +++ cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 93 +++++++++++++++- cpp/include/raft/neighbors/cagra_types.hpp | 102 ++---------------- .../detail/cagra/cagra_serialize.cuh | 2 +- .../raft/neighbors/detail/cagra/utils.hpp | 34 ++++++ 7 files changed, 150 insertions(+), 98 deletions(-) rename cpp/{include/raft/neighbors => bench/ann/src/common}/cuda_huge_page_resource.hpp (100%) rename cpp/{include/raft/neighbors => bench/ann/src/common}/cuda_pinned_resource.hpp (100%) diff --git a/cpp/include/raft/neighbors/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp similarity index 100% rename from cpp/include/raft/neighbors/cuda_huge_page_resource.hpp rename to cpp/bench/ann/src/common/cuda_huge_page_resource.hpp diff --git a/cpp/include/raft/neighbors/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp similarity index 100% rename from cpp/include/raft/neighbors/cuda_pinned_resource.hpp rename to cpp/bench/ann/src/common/cuda_pinned_resource.hpp diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index fa20c5c223..89c9307ead 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -157,6 +157,21 @@ void parse_build_param(const nlohmann::json& conf, if (conf.contains("nn_descent_niter")) { param.nn_descent_niter = conf.at("nn_descent_niter"); } } +AllocatorType parse_allocator(std::string mem_type) +{ + if (mem_type == "device") { + return AllocatorType::Device; + } else if (mem_type == "host_pinned") { + return AllocatorType::HostPinned; + } else if (mem_type == "host_huge_page") { + return AllocatorType::HostHugePage; + } + THROW( + "Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", " + "\"host_huge_page\"", + mem_type.c_str()); +} + template void parse_search_param(const nlohmann::json& conf, typename raft::bench::ann::RaftCagra::SearchParam& param) @@ -178,6 +193,8 @@ void parse_search_param(const nlohmann::json& conf, THROW("Invalid value for algo: %s", tmp.c_str()); } } + if (conf.contains("graph_mem")) { param.graph_mem = parse_allocator(conf.at("graph_mem")); } + if (conf.contains("dataset_mem")) { param.dataset_mem = parse_allocator(conf.at("dataset_mem")); } } #endif diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 19c5151186..3f2d011bd3 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -38,8 +38,15 @@ #include "raft_ann_bench_utils.h" #include +#include "../common/cuda_huge_page_resource.hpp" +#include "../common/cuda_pinned_resource.hpp" + +#include +#include + namespace raft::bench::ann { +enum class AllocatorType { HostPinned, HostHugePage, Device }; template class RaftCagra : public ANN { public: @@ -47,6 +54,8 @@ class RaftCagra : public ANN { struct SearchParam : public AnnSearchParam { raft::neighbors::experimental::cagra::search_params p; + AllocatorType graph_mem = AllocatorType::Device; + AllocatorType dataset_mem = AllocatorType::Device; auto needs_dataset() const -> bool override { return true; } }; @@ -56,7 +65,11 @@ class RaftCagra : public ANN { : ANN(metric, dim), index_params_(param), dimension_(dim), - mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull) + mr_(rmm::mr::get_current_device_resource(), 1024 * 1024 * 1024ull), + need_dataset_update_(true), + dataset_(make_device_matrix(handle_, 0, 0)), + graph_(make_device_matrix(handle_, 0, 0)), + graph_mem_(AllocatorType::Device) { rmm::mr::set_current_device_resource(&mr_); index_params_.metric = parse_metric_type(metric); @@ -92,14 +105,29 @@ class RaftCagra : public ANN { void load(const std::string&) override; private: + inline rmm::mr::device_memory_resource* get_mr(AllocatorType mem_type) + { + switch (mem_type) { + case (AllocatorType::HostPinned): return &mr_pinned_; + case (AllocatorType::HostHugePage): return &mr_huge_page_; + default: return rmm::mr::get_current_device_resource(); + } + } // `mr_` must go first to make sure it dies last rmm::mr::pool_memory_resource mr_; + rmm ::mr::cuda_pinned_resource mr_pinned_; + rmm ::mr::cuda_huge_page_resource mr_huge_page_; raft::device_resources handle_; + AllocatorType graph_mem_; + AllocatorType dataset_mem_; BuildParam index_params_; + bool need_dataset_update_; raft::neighbors::cagra::search_params search_params_; std::optional> index_; int device_; int dimension_; + raft::device_matrix graph_; + raft::device_matrix dataset_; }; template @@ -118,18 +146,77 @@ void RaftCagra::build(const T* dataset, size_t nrow, cudaStream_t) } } +inline std::string allocator_to_string(AllocatorType mem_type) +{ + if (mem_type == AllocatorType::Device) { + return "device"; + } else if (mem_type == AllocatorType::HostPinned) { + return "host_pinned"; + } else if (mem_type == AllocatorType::HostHugePage) { + return "host_huge_page"; + } + return ""; +} + template void RaftCagra::set_search_param(const AnnSearchParam& param) { auto search_param = dynamic_cast(param); search_params_ = search_param.p; + if (search_param.graph_mem != graph_mem_) { + // Move graph to correct memory space + graph_mem_ = search_param.graph_mem; + std::cout << "Moving graph to new memory space " << allocator_to_string(graph_mem_) + << std::endl; + // We create a new graph and copy to it from existing graph + auto mr = get_mr(graph_mem_); + auto new_graph = make_device_mdarray( + handle_, mr, make_extents(index_->graph().extent(0), index_->graph_degree())); + + std::cout << "new_grap " << new_graph.extent(0) << "x" << new_graph.extent(1) << std::endl; + std::cout << "graph size " << index_->graph().size() << std::endl; + raft::copy(new_graph.data_handle(), + index_->graph().data_handle(), + index_->graph().size(), + resource::get_cuda_stream(handle_)); + + index_->update_graph(handle_, make_const_mdspan(new_graph.view())); + // update_graph() only stores a view in the index. We need to keep the graph object alive. + graph_ = std::move(new_graph); + } + + if (search_param.dataset_mem != dataset_mem_) { + need_dataset_update_ = true; + dataset_mem_ = search_param.dataset_mem; + } } template void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) { - index_->update_dataset(handle_, - raft::make_host_matrix_view(dataset, nrow, this->dim_)); + // It can happen that we are re-using a previous algo object which already has + // the dataset set. Check if we need update. + if (index_->size() != nrow || need_dataset_update_) { + // First free up existing memory + dataset_ = make_device_matrix(handle_, 0, 0); + index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); + + // Allocate space using the correcct memory resource + auto mr = get_mr(dataset_mem_); + + std::cout << "Moving dataset to new memory space " << allocator_to_string(dataset_mem_) + << std::endl; + auto input_dataset_view = make_device_matrix_view(dataset, nrow, this->dim_); + raft::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_view, mr); + + index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); + + // Ideally, instead of dataset_.view(), we should pass a strided matrix view to update. + // auto dataset_view = make_device_strided_matrix_view( + // dataset_.data_handle(), dataset_.extent(0), this->dim_, dataset_.extent(1)); + // index_->update_dataset(handle_, dataset_view); + need_dataset_update_ = false; + } } template diff --git a/cpp/include/raft/neighbors/cagra_types.hpp b/cpp/include/raft/neighbors/cagra_types.hpp index ff0eb2b6c6..e8a0b8a7bd 100644 --- a/cpp/include/raft/neighbors/cagra_types.hpp +++ b/cpp/include/raft/neighbors/cagra_types.hpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -33,12 +34,6 @@ #include #include -#include "cuda_huge_page_resource.hpp" -#include "cuda_pinned_resource.hpp" - -#include -#include - #include namespace raft::neighbors::cagra { /** @@ -189,13 +184,9 @@ struct index : ann::index { index(raft::resources const& res, raft::distance::DistanceType metric = raft::distance::DistanceType::L2Expanded) : ann::index(), - mr_(new rmm::mr::cuda_pinned_resource()), - mr_huge_(new rmm::mr::cuda_huge_page_resource()), - metric_(raft::distance::DistanceType::L2Expanded), + metric_(metric), dataset_(make_device_matrix(res, 0, 0)), - dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), - graph_(make_device_matrix(res, 0, 0)), - graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) + graph_(make_device_matrix(res, 0, 0)) { } @@ -257,64 +248,16 @@ struct index : ann::index { index(raft::resources const& res, raft::distance::DistanceType metric, mdspan, row_major, data_accessor> dataset, - mdspan, row_major, graph_accessor> knn_graph, - bool graph_pinned = true, - bool data_pinned = true) + mdspan, row_major, graph_accessor> knn_graph) : ann::index(), - mr_(new rmm::mr::cuda_pinned_resource()), - mr_huge_(new rmm::mr::cuda_huge_page_resource()), metric_(metric), dataset_(make_device_matrix(res, 0, 0)), - dataset_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()), - // dataset_pinned_(0, resource::get_cuda_stream(res), mr_.get()), - graph_(make_device_matrix(res, 0, 0)), - graph_pinned_(0, resource::get_cuda_stream(res), mr_huge_.get()) - // graph_pinned_(0, resource::get_cuda_stream(res), mr_.get()) + graph_(make_device_matrix(res, 0, 0)) { RAFT_EXPECTS(dataset.extent(0) == knn_graph.extent(0), "Dataset and knn_graph must have equal number of rows"); - if (data_pinned) { - // copy with padding - int64_t aligned_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); - dataset_pinned_.resize(dataset.extent(0) * aligned_dim, resource::get_cuda_stream(res)); - resource::sync_stream(res); - - RAFT_LOG_INFO("Allocated pinned dataset"); - - memset(dataset_pinned_.data(), 0, dataset_pinned_.size() * sizeof(T)); - RAFT_CUDA_TRY(cudaMemcpy2DAsync(dataset_pinned_.data(), - sizeof(T) * aligned_dim, - dataset.data_handle(), - sizeof(T) * dataset.extent(1), - sizeof(T) * dataset.extent(1), - dataset.extent(0), - cudaMemcpyDefault, - resource::get_cuda_stream(res))); - - dataset_view_ = make_device_strided_matrix_view( - dataset_pinned_.data(), dataset.extent(0), dataset.extent(1), aligned_dim); - RAFT_LOG_INFO("CAGRA dataset strided matrix view %zux%zu, stride %zu", - static_cast(dataset_view_.extent(0)), - static_cast(dataset_view_.extent(1)), - static_cast(dataset_view_.stride(0))); - } else { - update_dataset(res, dataset); - } - if (graph_pinned) { - graph_pinned_.resize(knn_graph.size(), resource::get_cuda_stream(res)); - resource::sync_stream(res); - RAFT_LOG_INFO("Allocated pinned graph"); - - memset(graph_pinned_.data(), 0, sizeof(IdxT) * graph_pinned_.size()); - graph_view_ = make_device_matrix_view( - graph_pinned_.data(), knn_graph.extent(0), knn_graph.extent(1)); - raft::copy(graph_pinned_.data(), - knn_graph.data_handle(), - knn_graph.size(), - resource::get_cuda_stream(res)); - } else { - update_graph(res, knn_graph); - } + update_dataset(res, dataset); + update_graph(res, knn_graph); resource::sync_stream(res); } @@ -388,32 +331,8 @@ struct index : ann::index { void copy_padded(raft::resources const& res, mdspan, row_major, data_accessor> dataset) { - size_t padded_dim = round_up_safe(dataset.extent(1) * sizeof(T), 16) / sizeof(T); + detail::copy_with_padding(res, dataset_, dataset); - if ((dataset_.extent(0) != dataset.extent(0)) || - (static_cast(dataset_.extent(1)) != padded_dim)) { - // clear existing memory before allocating to prevent OOM errors on large datasets - if (dataset_.size()) { dataset_ = make_device_matrix(res, 0, 0); } - dataset_ = make_device_matrix(res, dataset.extent(0), padded_dim); - } - if (dataset_.extent(1) == dataset.extent(1)) { - raft::copy(dataset_.data_handle(), - dataset.data_handle(), - dataset.size(), - resource::get_cuda_stream(res)); - } else { - // copy with padding - RAFT_CUDA_TRY(cudaMemsetAsync( - dataset_.data_handle(), 0, dataset_.size() * sizeof(T), resource::get_cuda_stream(res))); - RAFT_CUDA_TRY(cudaMemcpy2DAsync(dataset_.data_handle(), - sizeof(T) * dataset_.extent(1), - dataset.data_handle(), - sizeof(T) * dataset.extent(1), - sizeof(T) * dataset.extent(1), - dataset.extent(0), - cudaMemcpyDefault, - resource::get_cuda_stream(res))); - } dataset_view_ = make_device_strided_matrix_view( dataset_.data_handle(), dataset_.extent(0), dataset.extent(1), dataset_.extent(1)); RAFT_LOG_DEBUG("CAGRA dataset strided matrix view %zux%zu, stride %zu", @@ -422,14 +341,9 @@ struct index : ann::index { static_cast(dataset_view_.stride(0))); } - private: - std::unique_ptr mr_; - std::unique_ptr mr_huge_; raft::distance::DistanceType metric_; raft::device_matrix dataset_; - rmm::device_uvector dataset_pinned_; raft::device_matrix graph_; - rmm::device_uvector graph_pinned_; raft::device_matrix_view dataset_view_; raft::device_matrix_view graph_view_; }; diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh index cb7b39a431..8261f637e1 100644 --- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh +++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh @@ -36,7 +36,7 @@ struct check_index_layout { "paste in the new size and consider updating the serialization logic"); }; -constexpr size_t expected_size = 296; +constexpr size_t expected_size = 200; template struct check_index_layout), expected_size>; /** diff --git a/cpp/include/raft/neighbors/detail/cagra/utils.hpp b/cpp/include/raft/neighbors/detail/cagra/utils.hpp index 22cbe6bbac..5e57a9589f 100644 --- a/cpp/include/raft/neighbors/detail/cagra/utils.hpp +++ b/cpp/include/raft/neighbors/detail/cagra/utils.hpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include namespace raft::neighbors::cagra::detail { @@ -245,4 +247,36 @@ class host_matrix_view_from_device { device_matrix_view device_view_; T* host_ptr; }; + +// Copy matrix src to dst. pad rows with 0 if necessary to make them 16 byte aligned. +template +void copy_with_padding(raft::resources const& res, + raft::device_matrix& dst, + mdspan, row_major, data_accessor> src, + rmm::mr::device_memory_resource* mr = nullptr) +{ + if (!mr) { mr = rmm::mr::get_current_device_resource(); } + size_t padded_dim = round_up_safe(src.extent(1) * sizeof(T), 16) / sizeof(T); + + if ((dst.extent(0) != src.extent(0)) || (static_cast(dst.extent(1)) != padded_dim)) { + // clear existing memory before allocating to prevent OOM errors on large datasets + if (dst.size()) { dst = make_device_matrix(res, 0, 0); } + dst = make_device_mdarray(res, mr, make_extents(src.extent(0), padded_dim)); + } + if (dst.extent(1) == src.extent(1)) { + raft::copy(dst.data_handle(), src.data_handle(), src.size(), resource::get_cuda_stream(res)); + } else { + // copy with padding + RAFT_CUDA_TRY(cudaMemsetAsync( + dst.data_handle(), 0, dst.size() * sizeof(T), resource::get_cuda_stream(res))); + RAFT_CUDA_TRY(cudaMemcpy2DAsync(dst.data_handle(), + sizeof(T) * dst.extent(1), + src.data_handle(), + sizeof(T) * src.extent(1), + sizeof(T) * src.extent(1), + src.extent(0), + cudaMemcpyDefault, + resource::get_cuda_stream(res))); + } +} } // namespace raft::neighbors::cagra::detail From b223dae2fa66d1fe5a8c591dba6d82560777942d Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Mon, 6 Nov 2023 01:38:01 +0100 Subject: [PATCH 44/57] Remove debug printouts and improve docstrings --- cpp/bench/ann/src/common/cuda_huge_page_resource.hpp | 6 ++++-- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 11 ++--------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index c0eb6378cc..a09691d0b3 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -26,8 +26,10 @@ namespace rmm::mr { /** - * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for - * allocation/deallocation. + * @brief `device_memory_resource` derived class that uses mmap to allocate memory. + * This class enables memory allocation using huge pages. + * It is assumed that the allocated memory is directly accessible on device. This currently only + * works on GH systems. */ class cuda_huge_page_resource final : public device_memory_resource { public: diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index 9da86ba570..da348170a7 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -163,15 +163,11 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) if (search_param.graph_mem != graph_mem_) { // Move graph to correct memory space graph_mem_ = search_param.graph_mem; - std::cout << "Moving graph to new memory space " << allocator_to_string(graph_mem_) - << std::endl; // We create a new graph and copy to it from existing graph auto mr = get_mr(graph_mem_); auto new_graph = make_device_mdarray( handle_, mr, make_extents(index_->graph().extent(0), index_->graph_degree())); - std::cout << "new_grap " << new_graph.extent(0) << "x" << new_graph.extent(1) << std::endl; - std::cout << "graph size " << index_->graph().size() << std::endl; raft::copy(new_graph.data_handle(), index_->graph().data_handle(), index_->graph().size(), @@ -198,11 +194,8 @@ void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) dataset_ = make_device_matrix(handle_, 0, 0); index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); - // Allocate space using the correcct memory resource - auto mr = get_mr(dataset_mem_); - - std::cout << "Moving dataset to new memory space " << allocator_to_string(dataset_mem_) - << std::endl; + // Allocate space using the correct memory resource. + auto mr = get_mr(dataset_mem_); auto input_dataset_view = make_device_matrix_view(dataset, nrow, this->dim_); raft::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_view, mr); From 53c6dedc4058a9fa387d6168266a4c41b0a7e45c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 8 Nov 2023 01:14:36 +0100 Subject: [PATCH 45/57] Improve comments, errors, naming --- .../src/common/cuda_huge_page_resource.hpp | 30 ++++++++----------- .../ann/src/common/cuda_pinned_resource.hpp | 22 ++++++++++---- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 5 ++-- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index a09691d0b3..a9092bdde2 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -24,14 +24,17 @@ #include -namespace rmm::mr { +namespace raft::mr { /** * @brief `device_memory_resource` derived class that uses mmap to allocate memory. * This class enables memory allocation using huge pages. * It is assumed that the allocated memory is directly accessible on device. This currently only * works on GH systems. + * + * TODO(tfeher): consider improving or removing this helper once we made progress with + * https://github.com/rapidsai/raft/issues/1819 */ -class cuda_huge_page_resource final : public device_memory_resource { +class cuda_huge_page_resource final : public rmm::mr::device_memory_resource { public: cuda_huge_page_resource() = default; ~cuda_huge_page_resource() override = default; @@ -68,19 +71,14 @@ class cuda_huge_page_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view) override + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override { void* _addr{nullptr}; _addr = mmap(NULL, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (_addr == MAP_FAILED) { - // RAFT_LOG_ERROR("mmap failed"); - - exit(-1); - } + if (_addr == MAP_FAILED) { RAFT_FAIL("huge_page_resource::MAP FAILED"); } if (madvise(_addr, bytes, MADV_HUGEPAGE) == -1) { - // RAFT_LOG_ERROR("madvise"); munmap(_addr, bytes); - exit(-1); + RAFT_FAIL("huge_page_resource::madvise MADV_HUGEPAGE"); } memset(_addr, 0, bytes); return _addr; @@ -95,12 +93,9 @@ class cuda_huge_page_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t size, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t size, rmm::cuda_stream_view) override { - if (munmap(ptr, size) == -1) { - // RAFT_LOG_ERROR("munmap"); - exit(-1); - } + if (munmap(ptr, size) == -1) { RAFT_FAIL("huge_page_resource::munmap"); } } /** @@ -127,7 +122,8 @@ class cuda_huge_page_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info( + rmm::cuda_stream_view) const override { std::size_t free_size{}; std::size_t total_size{}; @@ -135,4 +131,4 @@ class cuda_huge_page_resource final : public device_memory_resource { return std::make_pair(free_size, total_size); } }; -} // namespace rmm::mr \ No newline at end of file +} // namespace raft::mr \ No newline at end of file diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp index 2d0cb602be..288c52b8ae 100644 --- a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp @@ -22,12 +22,21 @@ #include -namespace rmm::mr { +namespace raft::mr { /** * @brief `device_memory_resource` derived class that uses cudaMallocHost/Free for * allocation/deallocation. + * + * This is almost the same as rmm::mr::host::pinned_memory_resource, but it has + * device_memory_resource as base class. Pinned memory can be accessed from device, + * and using this allocator we can create device_mdarray backed by pinned allocator. + * + * TODO(tfeher): it would be preferred to just rely on the existing allocator from rmm + * (pinned_memory_resource), but that is incompatible with the container_policy class + * for device matrix, because the latter expects a device_memory_resource. We shall + * revise this once we progress with Issue https://github.com/rapidsai/raft/issues/1819 */ -class cuda_pinned_resource final : public device_memory_resource { +class cuda_pinned_resource final : public rmm::mr::device_memory_resource { public: cuda_pinned_resource() = default; ~cuda_pinned_resource() override = default; @@ -64,7 +73,7 @@ class cuda_pinned_resource final : public device_memory_resource { * @param bytes The size, in bytes, of the allocation * @return void* Pointer to the newly allocated memory */ - void* do_allocate(std::size_t bytes, cuda_stream_view) override + void* do_allocate(std::size_t bytes, rmm::cuda_stream_view) override { void* ptr{nullptr}; RMM_CUDA_TRY_ALLOC(cudaMallocHost(&ptr, bytes)); @@ -80,7 +89,7 @@ class cuda_pinned_resource final : public device_memory_resource { * * @param p Pointer to be deallocated */ - void do_deallocate(void* ptr, std::size_t, cuda_stream_view) override + void do_deallocate(void* ptr, std::size_t, rmm::cuda_stream_view) override { RMM_ASSERT_CUDA_SUCCESS(cudaFreeHost(ptr)); } @@ -109,7 +118,8 @@ class cuda_pinned_resource final : public device_memory_resource { * * @return std::pair contaiing free_size and total_size of memory */ - [[nodiscard]] std::pair do_get_mem_info(cuda_stream_view) const override + [[nodiscard]] std::pair do_get_mem_info( + rmm::cuda_stream_view) const override { std::size_t free_size{}; std::size_t total_size{}; @@ -117,4 +127,4 @@ class cuda_pinned_resource final : public device_memory_resource { return std::make_pair(free_size, total_size); } }; -} // namespace rmm::mr \ No newline at end of file +} // namespace raft::mr \ No newline at end of file diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index f81c67a5d8..0763d9381c 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -124,8 +124,8 @@ class RaftCagra : public ANN { default: return rmm::mr::get_current_device_resource(); } } - rmm ::mr::cuda_pinned_resource mr_pinned_; - rmm ::mr::cuda_huge_page_resource mr_huge_page_; + raft ::mr::cuda_pinned_resource mr_pinned_; + raft ::mr::cuda_huge_page_resource mr_huge_page_; raft::device_resources handle_; AllocatorType graph_mem_; AllocatorType dataset_mem_; @@ -216,6 +216,7 @@ void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); // Ideally, instead of dataset_.view(), we should pass a strided matrix view to update. + // See Issue https://github.com/rapidsai/raft/issues/1972 for details. // auto dataset_view = make_device_strided_matrix_view( // dataset_.data_handle(), dataset_.extent(0), this->dim_, dataset_.extent(1)); // index_->update_dataset(handle_, dataset_view); From e355ff0bd1c28f6f3ebb9637228ad7afc9262219 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 8 Nov 2023 13:56:28 +0100 Subject: [PATCH 46/57] update tuning guide --- cpp/bench/ann/src/common/cuda_huge_page_resource.hpp | 4 +--- cpp/bench/ann/src/common/cuda_pinned_resource.hpp | 2 +- docs/source/ann_benchmarks_param_tuning.md | 4 ++++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp index a9092bdde2..9132db7c04 100644 --- a/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_huge_page_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,6 @@ */ #pragma once -// #include - #include #include #include diff --git a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp index 288c52b8ae..28ca691f86 100644 --- a/cpp/bench/ann/src/common/cuda_pinned_resource.hpp +++ b/cpp/bench/ann/src/common/cuda_pinned_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index cdc7958714..7e85684fbd 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -59,6 +59,10 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `search_width` | `search_param` | N | Positive Integer >0 | 1 | Number of graph nodes to select as the starting point for the search in each iteration. | | `max_iterations` | `search_param` | N | Integer >=0 | 0 | Upper limit of search iterations. Auto select when 0. | | `algo` | `search_param` | N | string | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} | +| `graph_mem` | `search_param` | N | string | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. | +| `dataset_mem` | `search_param` | N | string | "device" | Memory type to store dataset. Must be one of {"device", "host_pinned", "host_huge_page"}. | + +The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `device_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Managment or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim]. From 0cb632a2965bb167660733b9c99c1bee708dde23 Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 8 Nov 2023 13:59:11 +0100 Subject: [PATCH 47/57] corret tuning guide --- docs/source/ann_benchmarks_param_tuning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 7e85684fbd..f34716f70b 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -62,7 +62,7 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `graph_mem` | `search_param` | N | string | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. | | `dataset_mem` | `search_param` | N | string | "device" | Memory type to store dataset. Must be one of {"device", "host_pinned", "host_huge_page"}. | -The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `device_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Managment or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. +The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `dataset_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Managment or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim]. From 8ad652c33997d7f550211a1315786efde62c66d1 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 9 Nov 2023 15:16:43 -0500 Subject: [PATCH 48/57] Using _memory_type for consistencyy --- cpp/bench/ann/src/raft/raft_benchmark.cu | 8 ++++++-- docs/source/ann_benchmarks_param_tuning.md | 6 +++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu index a8f6849168..a6393aa1cc 100644 --- a/cpp/bench/ann/src/raft/raft_benchmark.cu +++ b/cpp/bench/ann/src/raft/raft_benchmark.cu @@ -251,8 +251,12 @@ void parse_search_param(const nlohmann::json& conf, THROW("Invalid value for algo: %s", tmp.c_str()); } } - if (conf.contains("graph_mem")) { param.graph_mem = parse_allocator(conf.at("graph_mem")); } - if (conf.contains("dataset_mem")) { param.dataset_mem = parse_allocator(conf.at("dataset_mem")); } + if (conf.contains("graph_memory_type")) { + param.graph_mem = parse_allocator(conf.at("graph_memory_type")); + } + if (conf.contains("dataset_memory_type")) { + param.dataset_mem = parse_allocator(conf.at("dataset_memory_type")); + } } #endif diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index f34716f70b..1d73d8ae19 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -59,10 +59,10 @@ CAGRA uses a graph-based index, which creates an intermediate, approximate kNN g | `search_width` | `search_param` | N | Positive Integer >0 | 1 | Number of graph nodes to select as the starting point for the search in each iteration. | | `max_iterations` | `search_param` | N | Integer >=0 | 0 | Upper limit of search iterations. Auto select when 0. | | `algo` | `search_param` | N | string | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} | -| `graph_mem` | `search_param` | N | string | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. | -| `dataset_mem` | `search_param` | N | string | "device" | Memory type to store dataset. Must be one of {"device", "host_pinned", "host_huge_page"}. | +| `graph_memory_type` | `search_param` | N | string | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. | +| `dataset_memory_type` | `search_param` | N | string | "device" | Memory type to store dataset. Must be one of {"device", "host_pinned", "host_huge_page"}. | -The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `dataset_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Managment or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. +The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `dataset_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Management or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim]. From 313876b48ffbe37c94e64fbfe0e97877c20f457a Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Thu, 9 Nov 2023 15:18:09 -0500 Subject: [PATCH 49/57] Adding correct link for hnsw params --- docs/source/ann_benchmarks_param_tuning.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 1d73d8ae19..0552dc0d56 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -80,6 +80,7 @@ To fine tune CAGRA index building we can customize IVF-PQ index builder options | `ivf_pq_search_refine_ratio` | `build_params` | N| Positive Number >=0 | 2 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. | Alternatively, if `graph_build_algo == "NN_DESCENT"`, then we can customize the following parameters + | Parameter | Type | Required | Data Type | Default | Description | |-----------------------------|----------------|----------|----------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `nn_descent_niter` | `build_param` | N | Positive Integer>0 | 20 | Number of NN Descent iterations. | @@ -167,4 +168,4 @@ Use FAISS IVF-PQ index on CPU | `ef` | `search_param` | Y | Positive Integer >0 | | Size of the dynamic list for the nearest neighbors used for search. Higher value leads to more accurate but slower search. Cannot be lower than `k`. | | `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. | -Please refer to [HNSW algorithm parameters guide] from `hnswlib` to learn more about these arguments. \ No newline at end of file +Please refer to [HNSW algorithm parameters guide](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) from `hnswlib` to learn more about these arguments. \ No newline at end of file From ca2f17a57760e39ae1289c846b7fa8c70bd7032c Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Tue, 14 Nov 2023 22:13:27 +0100 Subject: [PATCH 50/57] Fix merge error --- cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index 8edae1c005..f5741dee17 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -206,14 +206,14 @@ void parse_build_param(const nlohmann::json& conf, } } -AllocatorType parse_allocator(std::string mem_type) +raft::bench::ann::AllocatorType parse_allocator(std::string mem_type) { if (mem_type == "device") { - return AllocatorType::Device; + return raft::bench::ann::AllocatorType::Device; } else if (mem_type == "host_pinned") { - return AllocatorType::HostPinned; + return raft::bench::ann::AllocatorType::HostPinned; } else if (mem_type == "host_huge_page") { - return AllocatorType::HostHugePage; + return raft::bench::ann::AllocatorType::HostHugePage; } THROW( "Invalid value for memory type %s, must be one of [\"device\", \"host_pinned\", " From 6f9a39c209946d39926b74cdc66b83026e39f93e Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 15 Nov 2023 00:58:24 +0100 Subject: [PATCH 51/57] Resolve CAGAR bench parameter name conflict --- cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h | 4 ++-- docs/source/ann_benchmarks_param_tuning.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h index f5741dee17..1eb0e53cc5 100644 --- a/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h +++ b/cpp/bench/ann/src/raft/raft_ann_bench_param_parser.h @@ -245,8 +245,8 @@ void parse_search_param(const nlohmann::json& conf, if (conf.contains("graph_memory_type")) { param.graph_mem = parse_allocator(conf.at("graph_memory_type")); } - if (conf.contains("dataset_memory_type")) { - param.dataset_mem = parse_allocator(conf.at("dataset_memory_type")); + if (conf.contains("internal_dataset_memory_type")) { + param.dataset_mem = parse_allocator(conf.at("internal_dataset_memory_type")); } } #endif diff --git a/docs/source/ann_benchmarks_param_tuning.md b/docs/source/ann_benchmarks_param_tuning.md index 6684f0426b..dd74f030ad 100644 --- a/docs/source/ann_benchmarks_param_tuning.md +++ b/docs/source/ann_benchmarks_param_tuning.md @@ -53,16 +53,16 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of | `graph_degree` | `build_param` | N | Positive Integer >0 | 64 | Degree of the final kNN graph index. | | `intermediate_graph_degree` | `build_param` | N | Positive Integer >0 | 128 | Degree of the intermediate kNN graph. | | `graph_build_algo` | `build_param` | N | ["IVF_PQ", "NN_DESCENT"] | "IVF_PQ" | Algorithm to use for search | -| `dataset_memory_type` | `build_param` | N | ["device", "host", "mmap"] | "device" | What memory type should the dataset reside? | +| `dataset_memory_type` | `build_param` | N | ["device", "host", "mmap"] | "device" | What memory type should the dataset reside while constructing the index? | | `query_memory_type` | `search_params` | N | ["device", "host", "mmap"] | "device | What memory type should the queries reside? | | `itopk` | `search_wdith` | N | Positive Integer >0 | 64 | Number of intermediate search results retained during the search. Higher values improve search accuracy at the cost of speed. | | `search_width` | `search_param` | N | Positive Integer >0 | 1 | Number of graph nodes to select as the starting point for the search in each iteration. | | `max_iterations` | `search_param` | N | Integer >=0 | 0 | Upper limit of search iterations. Auto select when 0. | | `algo` | `search_param` | N | string | "auto" | Algorithm to use for search. Possible values: {"auto", "single_cta", "multi_cta", "multi_kernel"} | | `graph_memory_type` | `search_param` | N | string | "device" | Memory type to store gaph. Must be one of {"device", "host_pinned", "host_huge_page"}. | -| `dataset_memory_type` | `search_param` | N | string | "device" | Memory type to store dataset. Must be one of {"device", "host_pinned", "host_huge_page"}. | +| `internal_dataset_memory_type` | `search_param` | N | string | "device" | Memory type to store dataset in the index. Must be one of {"device", "host_pinned", "host_huge_page"}. | -The `graph_mem` or `device_mem` options can be useful for large datasets that do not fit the device memory. Setting `dataset_mem` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Management or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. +The `graph_memory_type` or `internal_dataset_memory_type` options can be useful for large datasets that do not fit the device memory. Setting `internal_dataset_memory_type` other than `device` has negative impact on search speed. Using `host_huge_page` option is only supported on systems with Heterogeneous Memory Management or on platforms that natively support GPU access to system allocated memory, for example Grace Hopper. To fine tune CAGRA index building we can customize IVF-PQ index builder options using the following settings. These take effect only if `graph_build_algo == "IVF_PQ"`. It is recommended to experiment using a separate IVF-PQ index to find the config that gives the largest QPS for large batch. Recall does not need to be very high, since CAGRA further optimizes the kNN neighbor graph. Some of the default values are derived from the dataset size which is assumed to be [n_vecs, dim]. From 6ca2a3d5d56d9223b7ca5153e7c2844f91ae13ef Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 15 Nov 2023 01:05:15 +0100 Subject: [PATCH 52/57] Update dataset memory allocation according to changed order of set_search_params/set_dataset --- cpp/bench/ann/src/raft/raft_cagra_wrapper.h | 39 +++++++++++++-------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h index a5ed9a73ad..a3e481ec5a 100644 --- a/cpp/bench/ann/src/raft/raft_cagra_wrapper.h +++ b/cpp/bench/ann/src/raft/raft_cagra_wrapper.h @@ -80,7 +80,9 @@ class RaftCagra : public ANN { need_dataset_update_(true), dataset_(make_device_matrix(handle_, 0, 0)), graph_(make_device_matrix(handle_, 0, 0)), - graph_mem_(AllocatorType::Device) + input_dataset_v_(nullptr, 0, 0), + graph_mem_(AllocatorType::Device), + dataset_mem_(AllocatorType::Device) { index_params_.cagra_params.metric = parse_metric_type(metric); index_params_.ivf_pq_build_params->metric = parse_metric_type(metric); @@ -138,6 +140,7 @@ class RaftCagra : public ANN { int dimension_; raft::device_matrix graph_; raft::device_matrix dataset_; + raft::device_matrix_view input_dataset_v_; }; template @@ -178,6 +181,7 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) if (search_param.graph_mem != graph_mem_) { // Move graph to correct memory space graph_mem_ = search_param.graph_mem; + RAFT_LOG_INFO("moving graph to new memory space: %s", allocator_to_string(graph_mem_).c_str()); // We create a new graph and copy to it from existing graph auto mr = get_mr(graph_mem_); auto new_graph = make_device_mdarray( @@ -193,26 +197,19 @@ void RaftCagra::set_search_param(const AnnSearchParam& param) graph_ = std::move(new_graph); } - if (search_param.dataset_mem != dataset_mem_) { - need_dataset_update_ = true; - dataset_mem_ = search_param.dataset_mem; - } -} + if (search_param.dataset_mem != dataset_mem_ || need_dataset_update_) { + dataset_mem_ = search_param.dataset_mem; -template -void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) -{ - // It can happen that we are re-using a previous algo object which already has - // the dataset set. Check if we need update. - if (index_->size() != nrow || need_dataset_update_) { // First free up existing memory dataset_ = make_device_matrix(handle_, 0, 0); index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); // Allocate space using the correct memory resource. - auto mr = get_mr(dataset_mem_); - auto input_dataset_view = make_device_matrix_view(dataset, nrow, this->dim_); - raft::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_view, mr); + RAFT_LOG_INFO("moving dataset to new memory space: %s", + allocator_to_string(dataset_mem_).c_str()); + + auto mr = get_mr(dataset_mem_); + raft::neighbors::cagra::detail::copy_with_padding(handle_, dataset_, input_dataset_v_, mr); index_->update_dataset(handle_, make_const_mdspan(dataset_.view())); @@ -225,6 +222,18 @@ void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) } } +template +void RaftCagra::set_search_dataset(const T* dataset, size_t nrow) +{ + // It can happen that we are re-using a previous algo object which already has + // the dataset set. Check if we need update. + if (static_cast(input_dataset_v_.extent(0)) != nrow || + input_dataset_v_.data_handle() != dataset) { + input_dataset_v_ = make_device_matrix_view(dataset, nrow, this->dim_); + need_dataset_update_ = true; + } +} + template void RaftCagra::save(const std::string& file) const { From 0c44d8418d28cf7e09b0825e47d31a2afa3c049b Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Wed, 15 Nov 2023 01:07:48 +0100 Subject: [PATCH 53/57] Add benchmark arg to control log level --- cpp/bench/ann/src/common/benchmark.hpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 7db5eab194..1926528232 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -21,6 +21,7 @@ #include "util.hpp" #include +#include #include #include @@ -572,6 +573,8 @@ inline auto run_main(int argc, char** argv) -> int std::string mode = "latency"; std::string threads_arg_txt = ""; std::vector threads = {1, -1}; // min_thread, max_thread + std::string log_level_str = ""; + int raft_log_level = raft::logger::get(RAFT_NAME).get_level(); kv_series override_kv{}; char arg0_default[] = "benchmark"; // NOLINT @@ -596,7 +599,12 @@ inline auto run_main(int argc, char** argv) -> int parse_string_flag(argv[i], "--index_prefix", index_prefix) || parse_string_flag(argv[i], "--mode", mode) || parse_string_flag(argv[i], "--override_kv", new_override_kv) || - parse_string_flag(argv[i], "--threads", threads_arg_txt)) { + parse_string_flag(argv[i], "--threads", threads_arg_txt) || + parse_string_flag(argv[i], "--raft_log_level", log_level_str)) { + if (!log_level_str.empty()) { + raft_log_level = std::stoi(log_level_str); + log_level_str = ""; + } if (!threads_arg_txt.empty()) { auto threads_arg = split(threads_arg_txt, ':'); threads[0] = std::stoi(threads_arg[0]); @@ -625,6 +633,8 @@ inline auto run_main(int argc, char** argv) -> int } } + raft::logger::get(RAFT_NAME).set_level(raft_log_level); + Objective metric_objective = Objective::LATENCY; if (mode == "throughput") { metric_objective = Objective::THROUGHPUT; } From fb7847c64af7efe6908e860ea9c503618509799d Mon Sep 17 00:00:00 2001 From: Tamas Bela Feher Date: Thu, 16 Nov 2023 00:26:40 +0100 Subject: [PATCH 54/57] Add raft_log_level arg to python wrapper --- .../src/raft-ann-bench/run/__main__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index a33467b554..04abefdf7b 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -88,6 +88,7 @@ def run_build_and_search( batch_size, search_threads, mode="throughput", + raft_log_level=4, ): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration @@ -117,6 +118,7 @@ def run_build_and_search( "--benchmark_counters_tabular=true", "--benchmark_out=" + f"{os.path.join(build_folder, f'{algo}.json')}", + "--raft_log_level=" + raft_log_level, ] if force: cmd = cmd + ["--overwrite"] @@ -150,6 +152,7 @@ def run_build_and_search( "--mode=%s" % mode, "--benchmark_out=" + f"{os.path.join(search_folder, f'{algo}.json')}", + "--raft_log_level=" + raft_log_level, ] if force: cmd = cmd + ["--overwrite"] @@ -294,6 +297,16 @@ def main(): "the command.", action="store_true", ) + parser.add_argument( + "--raft-log-level", + type=int, + help="Log level, possible values are [0,1,2,3,4,5,6]. These levels " + "correspond to [OFF, ERROR, WARN, INFO, DEBUG, TRACE] respectively." + "Default: 4 (INFO). Note that DEBUG or more detailed logging level " + "requires that the library is compiled with -DRAFT_ACTIVE_LEVER=" + " where >= ", + default=4, + ) if len(sys.argv) == 1: parser.print_help() @@ -511,6 +524,7 @@ def add_algo_group(group_list): batch_size, args.search_threads, mode, + args.raft_log_level, ) From 36cef0d878017c92d8916d347b847720005561a9 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Nov 2023 21:35:33 -0500 Subject: [PATCH 55/57] Fixing log level docs and option --- .../src/raft-ann-bench/run/__main__.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 04abefdf7b..c01478902c 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -25,6 +25,21 @@ import yaml +log_levels = { + "off": 0, + "error": 1, + "warn": 2, + "info": 3, + "debug": 4, + "trace": 5, +} + + +def parse_log_level(level_str): + if level_str not in log_levels: + raise ValueError("Invalid log level: %s" % level_str) + return log_levels[level_str.lower()] + def positive_int(input_str: str) -> int: try: @@ -88,7 +103,7 @@ def run_build_and_search( batch_size, search_threads, mode="throughput", - raft_log_level=4, + raft_log_level="info", ): for executable, ann_executable_path, algo in executables_to_run.keys(): # Need to write temporary configuration @@ -118,7 +133,7 @@ def run_build_and_search( "--benchmark_counters_tabular=true", "--benchmark_out=" + f"{os.path.join(build_folder, f'{algo}.json')}", - "--raft_log_level=" + raft_log_level, + "--raft_log_level=" + parse_log_level(raft_log_level), ] if force: cmd = cmd + ["--overwrite"] @@ -299,13 +314,12 @@ def main(): ) parser.add_argument( "--raft-log-level", - type=int, - help="Log level, possible values are [0,1,2,3,4,5,6]. These levels " - "correspond to [OFF, ERROR, WARN, INFO, DEBUG, TRACE] respectively." - "Default: 4 (INFO). Note that DEBUG or more detailed logging level " - "requires that the library is compiled with -DRAFT_ACTIVE_LEVER=" - " where >= ", - default=4, + help="Log level, possible values are " + "[off, error, warn, info, debug, trace]. " + "Default: 'info'. Note that 'debug' or more detailed " + "logging level requires that the library is compiled with " + "-DRAFT_ACTIVE_LEVEL= where >= ", + default="info", ) if len(sys.argv) == 1: From 918cd353330d420a7fe2789667c342636e75d651 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Nov 2023 22:21:14 -0500 Subject: [PATCH 56/57] More robust index file --- .../src/raft-ann-bench/run/__main__.py | 59 ++++++++++++++++++- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index c01478902c..177c7bf649 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -54,6 +54,53 @@ def positive_int(input_str: str) -> int: return i +def merge_build_files(build_dir, build_file, temp_build_file): + + build_dict = {} + + # If build file exists, read it + build_json_path = os.path.join(build_dir, build_file) + tmp_build_json_path = os.path.join(build_dir, temp_build_file) + if os.path.isfile(build_json_path): + try: + with open(build_json_path, "r") as f: + build_dict = json.load(f) + except Exception as e: + print( + "Error loading existing build file: %s (%s)" + % (build_json_path, e) + ) + + temp_build_dict = {} + if os.path.isfile(tmp_build_json_path): + with open(tmp_build_json_path, "r") as f: + temp_build_dict = json.load(f) + else: + raise ValueError("Temp build file not found: %s" % tmp_build_json_path) + + tmp_benchmarks = ( + temp_build_dict["benchmarks"] + if "benchmarks" in temp_build_dict + else {} + ) + benchmarks = build_dict["benchmarks"] if "benchmarks" in build_dict else {} + + # If the build time is absolute 0 then an error occurred + final_bench_dict = {} + for b in benchmarks: + if b["real_time"] > 0: + final_bench_dict[b["name"]] = b + + for tmp_bench in tmp_benchmarks: + if tmp_bench["real_time"] > 0: + final_bench_dict[tmp_bench["name"]] = tmp_bench + + temp_build_dict["benchmarks"] = [v for k, v in final_bench_dict.items()] + with open(build_json_path, "w") as f: + json_str = json.dumps(temp_build_dict, indent=2) + f.write(json_str) + + def validate_algorithm(algos_conf, algo, gpu_present): algos_conf_keys = set(algos_conf.keys()) if gpu_present: @@ -125,6 +172,8 @@ def run_build_and_search( if build: build_folder = os.path.join(legacy_result_folder, "build") os.makedirs(build_folder, exist_ok=True) + build_file = f"{algo}.json" + temp_build_file = f"{build_file}.lock" cmd = [ ann_executable_path, "--build", @@ -132,8 +181,8 @@ def run_build_and_search( "--benchmark_out_format=json", "--benchmark_counters_tabular=true", "--benchmark_out=" - + f"{os.path.join(build_folder, f'{algo}.json')}", - "--raft_log_level=" + parse_log_level(raft_log_level), + + f"{os.path.join(build_folder, temp_build_file)}", + "--raft_log_level=" + f"{parse_log_level(raft_log_level)}", ] if force: cmd = cmd + ["--overwrite"] @@ -146,9 +195,13 @@ def run_build_and_search( else: try: subprocess.run(cmd, check=True) + merge_build_files( + build_folder, build_file, temp_build_file + ) except Exception as e: print("Error occurred running benchmark: %s" % e) finally: + os.remove(os.path.join(build_folder, temp_build_file)) if not search: os.remove(temp_conf_filename) @@ -167,7 +220,7 @@ def run_build_and_search( "--mode=%s" % mode, "--benchmark_out=" + f"{os.path.join(search_folder, f'{algo}.json')}", - "--raft_log_level=" + raft_log_level, + "--raft_log_level=" + f"{parse_log_level(raft_log_level)}", ] if force: cmd = cmd + ["--overwrite"] From 61339e051564de97b4d29e480bf6cf742d775d59 Mon Sep 17 00:00:00 2001 From: "Corey J. Nolet" Date: Wed, 15 Nov 2023 22:25:59 -0500 Subject: [PATCH 57/57] Use "force" instead of "overwrite" --- cpp/bench/ann/src/common/benchmark.hpp | 8 ++++---- python/raft-ann-bench/src/raft-ann-bench/run/__main__.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/bench/ann/src/common/benchmark.hpp b/cpp/bench/ann/src/common/benchmark.hpp index 1926528232..a2e77323c1 100644 --- a/cpp/bench/ann/src/common/benchmark.hpp +++ b/cpp/bench/ann/src/common/benchmark.hpp @@ -132,7 +132,7 @@ void bench_build(::benchmark::State& state, log_info("Overwriting file: %s", index.file.c_str()); } else { return state.SkipWithMessage( - "Index file already exists (use --overwrite to overwrite the index)."); + "Index file already exists (use --force to overwrite the index)."); } } @@ -381,7 +381,7 @@ inline void printf_usage() ::benchmark::PrintDefaultHelp(); fprintf(stdout, " [--build|--search] \n" - " [--overwrite]\n" + " [--force]\n" " [--data_prefix=]\n" " [--index_prefix=]\n" " [--override_kv=]\n" @@ -393,7 +393,7 @@ inline void printf_usage() " --build: build mode, will build index\n" " --search: search mode, will search using the built index\n" " one and only one of --build and --search should be specified\n" - " --overwrite: force overwriting existing index files\n" + " --force: force overwriting existing index files\n" " --data_prefix=:" " prepend to dataset file paths specified in the .json (default = " "'data/').\n" @@ -592,7 +592,7 @@ inline auto run_main(int argc, char** argv) -> int std::ifstream conf_stream(conf_path); for (int i = 1; i < argc; i++) { - if (parse_bool_flag(argv[i], "--overwrite", force_overwrite) || + if (parse_bool_flag(argv[i], "--force", force_overwrite) || parse_bool_flag(argv[i], "--build", build_mode) || parse_bool_flag(argv[i], "--search", search_mode) || parse_string_flag(argv[i], "--data_prefix", data_prefix) || diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py index 177c7bf649..4611f39264 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py +++ b/python/raft-ann-bench/src/raft-ann-bench/run/__main__.py @@ -185,7 +185,7 @@ def run_build_and_search( "--raft_log_level=" + f"{parse_log_level(raft_log_level)}", ] if force: - cmd = cmd + ["--overwrite"] + cmd = cmd + ["--force"] cmd = cmd + [temp_conf_filename] if dry_run: @@ -223,7 +223,7 @@ def run_build_and_search( "--raft_log_level=" + f"{parse_log_level(raft_log_level)}", ] if force: - cmd = cmd + ["--overwrite"] + cmd = cmd + ["--force"] if search_threads: cmd = cmd + ["--threads=%s" % search_threads]