Skip to content

Commit

Permalink
add faiss cagra benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Mar 6, 2024
1 parent e1d018a commit 16690ff
Show file tree
Hide file tree
Showing 18 changed files with 502 additions and 37 deletions.
11 changes: 11 additions & 0 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
option(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT "Include faiss' brute-force knn algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT "Include faiss' ivf flat algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ "Include faiss' ivf pq algorithm in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_GPU_CAGRA "Include faiss' cagra algorithm wrapper in benchmark" ON)
option(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT "Include faiss' cpu brute-force algorithm in benchmark" ON)

option(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT "Include faiss' cpu ivf flat algorithm in benchmark"
Expand Down Expand Up @@ -50,6 +51,7 @@ if(BUILD_CPU_ONLY)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_CAGRA OFF)
set(RAFT_ANN_BENCH_USE_RAFT_IVF_FLAT OFF)
set(RAFT_ANN_BENCH_USE_RAFT_IVF_PQ OFF)
set(RAFT_ANN_BENCH_USE_RAFT_CAGRA OFF)
Expand All @@ -64,6 +66,7 @@ else()
set(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ OFF)
set(RAFT_ANN_BENCH_USE_FAISS_GPU_CAGRA OFF)
set(RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT OFF)
set(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ OFF)
set(RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT OFF)
Expand All @@ -76,6 +79,7 @@ set(RAFT_ANN_BENCH_USE_FAISS OFF)
if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT
OR RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ
OR RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_FLAT
OR RAFT_ANN_BENCH_USE_FAISS_GPU_CAGRA
OR RAFT_ANN_BENCH_USE_FAISS_CPU_FLAT
OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_PQ
OR RAFT_ANN_BENCH_USE_FAISS_CPU_IVF_FLAT
Expand Down Expand Up @@ -321,6 +325,13 @@ if(RAFT_ANN_BENCH_USE_FAISS_GPU_IVF_PQ)
)
endif()

if(RAFT_ANN_BENCH_USE_FAISS_GPU_CAGRA)
ConfigureAnnBench(
NAME FAISS_GPU_CAGRA PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS
${RAFT_FAISS_TARGETS}
)
endif()

if(RAFT_ANN_BENCH_USE_FAISS_GPU_FLAT)
ConfigureAnnBench(
NAME FAISS_GPU_FLAT PATH bench/ann/src/faiss/faiss_gpu_benchmark.cu LINKS ${RAFT_FAISS_TARGETS}
Expand Down
9 changes: 9 additions & 0 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,19 @@ void bench_search(::benchmark::State& state,

// Each thread calculates recall on their partition of queries.
// evaluate recall
// std::cout << "max k: " << dataset->max_k() << std::endl;
if (dataset->max_k() >= k) {
const std::int32_t* gt = dataset->gt_set();
const std::uint32_t max_k = dataset->max_k();
buf<std::size_t> neighbors_host = neighbors->move(MemoryType::Host);

// std::cout << "first 5 n: ";
for(int ll = 0; ll < 5; ++ll) std::cout << neighbors_host.data[ll] << " ";
std::cout << std::endl;
// std::cout << "first 5 gt n: ";
for(int ll = 0; ll < 5; ++ll) std::cout << gt[ll] << " ";
std::cout << std::endl;

std::size_t rows = std::min(queries_processed, query_set_size);
std::size_t match_count = 0;
std::size_t total_count = rows * static_cast<size_t>(k);
Expand Down
22 changes: 22 additions & 0 deletions cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ void parse_build_param(const nlohmann::json& conf,
param.quantizer_type = conf.at("quantizer_type");
}

template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissCpuHNSW<T>::BuildParam& param)
{
param.ef_construction = conf.at("efConstruction");
param.M = conf.at("M");
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissCpu<T>::SearchParam& param)
Expand All @@ -79,6 +87,14 @@ void parse_search_param(const nlohmann::json& conf,
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
}

template <typename T>
void parse_search_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissCpuHNSW<T>::SearchParam& param)
{
param.ef = conf.at("ef");
if (conf.contains("numThreads")) { param.num_threads = conf.at("numThreads"); }
}

template <typename T, template <typename> class Algo>
std::unique_ptr<raft::bench::ann::ANN<T>> make_algo(raft::bench::ann::Metric metric,
int dim,
Expand Down Expand Up @@ -124,6 +140,8 @@ std::unique_ptr<raft::bench::ann::ANN<T>> create_algo(const std::string& algo,
ann = make_algo<T, raft::bench::ann::FaissCpuIVFSQ>(metric, dim, conf);
} else if (algo == "faiss_cpu_flat") {
ann = std::make_unique<raft::bench::ann::FaissCpuFlat<T>>(metric, dim);
} else if (algo == "faiss_cpu_hnsw") {
ann = make_algo<T, raft::bench::ann::FaissCpuHNSW>(metric, dim, conf);
}
}

Expand All @@ -145,6 +163,10 @@ std::unique_ptr<typename raft::bench::ann::ANN<T>::AnnSearchParam> create_search
} else if (algo == "faiss_cpu_flat") {
auto param = std::make_unique<typename raft::bench::ann::FaissCpu<T>::SearchParam>();
return param;
} else if (algo == "faiss_cpu_hnsw") {
auto param = std::make_unique<typename raft::bench::ann::FaissCpuHNSW<T>::SearchParam>();
parse_search_param<T>(conf, *param);
return param;
}
// else
throw std::runtime_error("invalid algo: '" + algo + "'");
Expand Down
107 changes: 81 additions & 26 deletions cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
#pragma once

#include "../common/ann_types.hpp"
#include "../common/thread_pool.hpp"
#include "faiss/impl/HNSW.h"

#include <raft/core/logger.hpp>

#include <faiss/IndexFlat.h>
#include <faiss/IndexHNSW.h>
#include <faiss/IndexIVFFlat.h>
#include <faiss/IndexIVFPQ.h>
#include <faiss/IndexRefine.h>
Expand Down Expand Up @@ -73,7 +74,7 @@ class FaissCpu : public ANN<T> {
static_assert(std::is_same_v<T, float>, "faiss support only float type");
}

void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final;
virtual void build(const T* dataset, size_t nrow, cudaStream_t stream = 0);

void set_search_param(const AnnSearchParam& param) override;

Expand All @@ -88,12 +89,12 @@ class FaissCpu : public ANN<T> {

// TODO: if the number of results is less than k, the remaining elements of 'neighbors'
// will be filled with (size_t)-1
void search(const T* queries,
virtual void search(const T* queries,
int batch_size,
int k,
size_t* neighbors,
float* distances,
cudaStream_t stream = 0) const final;
cudaStream_t stream = 0) const;

AlgoProperty get_preference() const override
{
Expand All @@ -117,9 +118,6 @@ class FaissCpu : public ANN<T> {
faiss::MetricType metric_type_;
int nlist_;
double training_sample_fraction_;

int num_threads_;
std::shared_ptr<FixedThreadPool> thread_pool_;
};

template <typename T>
Expand Down Expand Up @@ -150,12 +148,14 @@ void FaissCpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
index_->train(nrow, dataset); // faiss::IndexFlat::train() will do nothing
assert(index_->is_trained);
index_->add(nrow, dataset);
std::cout << "finished adding" << std::endl;
index_refine_ = std::make_shared<faiss::IndexRefineFlat>(this->index_.get(), dataset);
}

template <typename T>
void FaissCpu<T>::set_search_param(const AnnSearchParam& param)
{
std::cout << "should not be here" << std::endl;
auto search_param = dynamic_cast<const SearchParam&>(param);
int nprobe = search_param.nprobe;
assert(nprobe <= nlist_);
Expand All @@ -165,9 +165,13 @@ void FaissCpu<T>::set_search_param(const AnnSearchParam& param)
this->index_refine_.get()->k_factor = search_param.refine_ratio;
}

if (!thread_pool_ || num_threads_ != search_param.num_threads) {
num_threads_ = search_param.num_threads;
thread_pool_ = std::make_shared<FixedThreadPool>(num_threads_);
if (param.metric_objective == Objective::LATENCY) {
// Let FAISS use its internal threading model with user defined `numThreads`
omp_set_num_threads(search_param.num_threads);
}
else if (param.metric_objective == Objective::THROUGHPUT) {
// FAISS is not allowed to internally parallelize
omp_set_num_threads(1);
}
}

Expand All @@ -182,12 +186,7 @@ void FaissCpu<T>::search(const T* queries,
static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
"sizes of size_t and faiss::idx_t are different");

thread_pool_->submit(
[&](int i) {
// Use thread pool for batch size = 1. FAISS multi-threads internally for batch size > 1.
index_->search(batch_size, queries, k, distances, reinterpret_cast<faiss::idx_t*>(neighbors));
},
1);
index_->search(batch_size, queries, k, distances, reinterpret_cast<faiss::idx_t*>(neighbors));
}

template <typename T>
Expand Down Expand Up @@ -306,16 +305,6 @@ class FaissCpuFlat : public FaissCpu<T> {
this->index_ = std::make_shared<faiss::IndexFlat>(dim, this->metric_type_);
}

// class FaissCpu is more like a IVF class, so need special treating here
void set_search_param(const typename ANN<T>::AnnSearchParam& param) override
{
auto search_param = dynamic_cast<const typename FaissCpu<T>::SearchParam&>(param);
if (!this->thread_pool_ || this->num_threads_ != search_param.num_threads) {
this->num_threads_ = search_param.num_threads;
this->thread_pool_ = std::make_shared<FixedThreadPool>(this->num_threads_);
}
};

void save(const std::string& file) const override
{
this->template save_<faiss::IndexFlat>(file);
Expand All @@ -328,4 +317,70 @@ class FaissCpuFlat : public FaissCpu<T> {
}
};

template <typename T>
class FaissCpuHNSW : public FaissCpu<T> {
public:
struct BuildParam {
int M;
int ef_construction;
};

using typename ANN<T>::AnnSearchParam;
struct SearchParam : public AnnSearchParam {
int ef;
int num_threads;
};

FaissCpuHNSW(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, typename FaissCpu<T>::BuildParam())
{
this->index_ = std::make_shared<faiss::IndexHNSWFlat>(
param.M, dim, this->metric_type_);
dynamic_cast<faiss::IndexHNSWFlat*>(this->index_.get())->hnsw.efConstruction = param.ef_construction;
}

void save(const std::string& file) const override
{
this->template save_<faiss::IndexHNSWFlat>(file);
}
void load(const std::string& file) override { this->template load_<faiss::IndexHNSWFlat>(file); }

std::unique_ptr<ANN<T>> copy()
{
return std::make_unique<FaissCpuHNSW<T>>(*this); // use copy constructor
}

void set_search_param(const AnnSearchParam& param) override {
auto search_param = dynamic_cast<const SearchParam&>(param);
if (search_param.metric_objective == Objective::LATENCY) {
// Let FAISS use its internal threading model with user defined `numThreads`
omp_set_num_threads(search_param.num_threads);
}
else if (search_param.metric_objective == Objective::THROUGHPUT) {
// FAISS is not allowed to internally parallelize
omp_set_num_threads(1);
}
search_params_.efSearch = search_param.ef;
}

void build(const T* dataset, size_t nrow, cudaStream_t stream) override
{
this->index_->train(nrow, dataset); // faiss::IndexHNSWFlat::train() will do nothing
assert(index_->is_trained);
this->index_->add(nrow, dataset);
std::cout << "finished adding" << std::endl;
}

void search(const T* queries,
int batch_size,
int k,
size_t* neighbors,
float* distances,
cudaStream_t stream = 0) const override {

this->index_->search(batch_size, queries, k, distances, reinterpret_cast<faiss::idx_t*>(neighbors), &search_params_);
}

private:
faiss::SearchParametersHNSW search_params_;
};
} // namespace raft::bench::ann
Loading

0 comments on commit 16690ff

Please sign in to comment.