Skip to content

Commit

Permalink
Fixes to new YAML config raft-bench-ann (#1945)
Browse files Browse the repository at this point in the history
Authors:
  - Divye Gala (https://github.com/divyegala)
  - Corey J. Nolet (https://github.com/cjnolet)

Approvers:
  - Corey J. Nolet (https://github.com/cjnolet)

URL: #1945
  • Loading branch information
divyegala authored Nov 1, 2023
1 parent d504795 commit 67a796c
Show file tree
Hide file tree
Showing 19 changed files with 163 additions and 106 deletions.
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/common/ann_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ class ANN : public AnnBase {
// The advantage of this way is that index has smaller size
// and many indices can share one dataset.
//
// AlgoProperty::need_dataset_when_search of such algorithm should be true,
// SearchParam::needs_dataset() of such algorithm should be true,
// and set_search_dataset() should save the passed-in pointer somewhere.
// The client code should call set_search_dataset() before searching,
// and should not release dataset before searching is finished.
Expand Down
12 changes: 11 additions & 1 deletion cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,11 +215,13 @@ void bench_search(::benchmark::State& state,
search_param->metric_objective = metric_objective;
} catch (const std::exception& e) {
state.SkipWithError("Failed to create an algo: " + std::string(e.what()));
return;
}
algo->set_search_param(*search_param);

auto algo_property = parse_algo_property(algo->get_preference(), sp_json);
current_algo_props = std::make_shared<AlgoProperty>(algo_property.dataset_memory_type,
algo_property.query_memory_type);

if (search_param->needs_dataset()) {
try {
algo->set_search_dataset(dataset->base_set(current_algo_props->dataset_memory_type),
Expand All @@ -231,6 +233,14 @@ void bench_search(::benchmark::State& state,
return;
}
}

try {
algo->set_search_param(*search_param);

} catch (const std::exception& ex) {
state.SkipWithError("An error occurred setting search parameters: " + std::string(ex.what()));
return;
}
}

const auto algo_property = *current_algo_props;
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/faiss/faiss_cpu_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissCpuIVFPQ<T>::BuildParam& param)
{
parse_base_build_param<T>(conf, param);
param.M_ratio = conf.at("M_ratio");
param.M = conf.at("M");
if (conf.contains("usePrecomputed")) {
param.usePrecomputed = conf.at("usePrecomputed");
} else {
Expand Down
12 changes: 4 additions & 8 deletions cpp/bench/ann/src/faiss/faiss_cpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ void FaissCpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
index_->train(nrow, dataset); // faiss::IndexFlat::train() will do nothing
assert(index_->is_trained);
index_->add(nrow, dataset);
index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get(), dataset);
}

template <typename T>
Expand All @@ -163,7 +164,6 @@ void FaissCpu<T>::set_search_param(const AnnSearchParam& param)
dynamic_cast<faiss::IndexIVF*>(index_.get())->nprobe = nprobe;

if (search_param.refine_ratio > 1.0) {
this->index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get());
this->index_refine_.get()->k_factor = search_param.refine_ratio;
}

Expand Down Expand Up @@ -229,20 +229,16 @@ template <typename T>
class FaissCpuIVFPQ : public FaissCpu<T> {
public:
struct BuildParam : public FaissCpu<T>::BuildParam {
int M_ratio;
int M;
int bitsPerCode;
bool usePrecomputed;
};

FaissCpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissCpu<T>(metric, dim, param)
{
this->init_quantizer(dim);
this->index_ = std::make_unique<faiss::IndexIVFPQ>(this->quantizer_.get(),
dim,
param.nlist,
dim / param.M_ratio,
param.bitsPerCode,
this->metric_type_);
this->index_ = std::make_unique<faiss::IndexIVFPQ>(
this->quantizer_.get(), dim, param.nlist, param.M, param.bitsPerCode, this->metric_type_);
}

void save(const std::string& file) const override
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFPQ<T>::BuildParam& param)
{
parse_base_build_param<T>(conf, param);
param.M_ratio = conf.at("M_ratio");
param.M = conf.at("M");
if (conf.contains("usePrecomputed")) {
param.usePrecomputed = conf.at("usePrecomputed");
} else {
Expand Down
62 changes: 47 additions & 15 deletions cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
#include <faiss/index_io.h>
#include <omp.h>

#include <raft/core/device_resources.hpp>
#include <raft/core/resource/stream_view.hpp>

#include <cassert>
#include <memory>
#include <stdexcept>
Expand Down Expand Up @@ -84,6 +87,7 @@ class FaissGpu : public ANN<T> {
struct SearchParam : public AnnSearchParam {
int nprobe;
float refine_ratio = 1.0;
auto needs_dataset() const -> bool override { return refine_ratio > 1.0f; }
};

struct BuildParam {
Expand All @@ -101,6 +105,7 @@ class FaissGpu : public ANN<T> {
RAFT_CUDA_TRY(cudaGetDevice(&device_));
RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
raft::resource::set_cuda_stream(handle_, faiss_default_stream_);
}

virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); }
Expand All @@ -109,6 +114,8 @@ class FaissGpu : public ANN<T> {

virtual void set_search_param(const FaissGpu<T>::AnnSearchParam& param) {}

void set_search_dataset(const T* dataset, size_t nrow) override { dataset_ = dataset; }

// TODO: if the number of results is less than k, the remaining elements of 'neighbors'
// will be filled with (size_t)-1
void search(const T* queries,
Expand All @@ -123,7 +130,7 @@ class FaissGpu : public ANN<T> {
AlgoProperty property;
// to enable building big dataset which is larger than GPU memory
property.dataset_memory_type = MemoryType::Host;
property.query_memory_type = MemoryType::Device;
property.query_memory_type = MemoryType::Host;
return property;
}

Expand All @@ -142,14 +149,17 @@ class FaissGpu : public ANN<T> {

mutable faiss::gpu::StandardGpuResources gpu_resource_;
std::unique_ptr<faiss::gpu::GpuIndex> index_;
std::unique_ptr<faiss::IndexRefineFlat> index_refine_;
std::unique_ptr<faiss::IndexRefineFlat> index_refine_{nullptr};
faiss::MetricType metric_type_;
int nlist_;
int device_;
cudaEvent_t sync_{nullptr};
cudaStream_t faiss_default_stream_{nullptr};
double training_sample_fraction_;
std::unique_ptr<faiss::SearchParameters> search_params_;
const T* dataset_;
raft::device_resources handle_;
float refine_ratio_ = 1.0;
};

template <typename T>
Expand Down Expand Up @@ -194,7 +204,25 @@ void FaissGpu<T>::search(const T* queries,
{
static_assert(sizeof(size_t) == sizeof(faiss::idx_t),
"sizes of size_t and faiss::idx_t are different");
index_->search(batch_size, queries, k, distances, reinterpret_cast<faiss::idx_t*>(neighbors));

if (this->refine_ratio_ > 1.0) {
// TODO: FAISS changed their search APIs to accept the search parameters as a struct object
// but their refine API doesn't allow the struct to be passed in. Once this is fixed, we
// need to re-enable refinement below
// index_refine_->search(batch_size, queries, k, distances,
// reinterpret_cast<faiss::idx_t*>(neighbors), this->search_params_.get()); Related FAISS issue:
// https://github.com/facebookresearch/faiss/issues/3118
throw std::runtime_error(
"FAISS doesn't support refinement in their new APIs so this feature is disabled in the "
"benchmarks for the time being.");
} else {
index_->search(batch_size,
queries,
k,
distances,
reinterpret_cast<faiss::idx_t*>(neighbors),
this->search_params_.get());
}
stream_wait(stream);
}

Expand All @@ -217,7 +245,13 @@ void FaissGpu<T>::load_(const std::string& file)

std::unique_ptr<CpuIndex> cpu_index(dynamic_cast<CpuIndex*>(faiss::read_index(file.c_str())));
assert(cpu_index);
dynamic_cast<GpuIndex*>(index_.get())->copyFrom(cpu_index.get());

try {
dynamic_cast<GpuIndex*>(index_.get())->copyFrom(cpu_index.get());

} catch (const std::exception& e) {
std::cout << "Error loading index file: " << std::string(e.what()) << std::endl;
}
}

template <typename T>
Expand All @@ -242,11 +276,7 @@ class FaissGpuIVFFlat : public FaissGpu<T> {
faiss::IVFSearchParameters faiss_search_params;
faiss_search_params.nprobe = nprobe;
this->search_params_ = std::make_unique<faiss::IVFSearchParameters>(faiss_search_params);

if (search_param.refine_ratio > 1.0) {
this->index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get());
this->index_refine_.get()->k_factor = search_param.refine_ratio;
}
this->refine_ratio_ = search_param.refine_ratio;
}

void save(const std::string& file) const override
Expand All @@ -263,7 +293,7 @@ template <typename T>
class FaissGpuIVFPQ : public FaissGpu<T> {
public:
struct BuildParam : public FaissGpu<T>::BuildParam {
int M_ratio;
int M;
bool useFloat16;
bool usePrecomputed;
};
Expand All @@ -279,7 +309,7 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
std::make_unique<faiss::gpu::GpuIndexIVFPQ>(&(this->gpu_resource_),
dim,
param.nlist,
dim / param.M_ratio,
param.M,
8, // FAISS only supports bitsPerCode=8
this->metric_type_,
config);
Expand All @@ -290,14 +320,15 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
auto search_param = dynamic_cast<const typename FaissGpu<T>::SearchParam&>(param);
int nprobe = search_param.nprobe;
assert(nprobe <= nlist_);

this->refine_ratio_ = search_param.refine_ratio;
faiss::IVFPQSearchParameters faiss_search_params;
faiss_search_params.nprobe = nprobe;

this->search_params_ = std::make_unique<faiss::IVFPQSearchParameters>(faiss_search_params);

if (search_param.refine_ratio > 1.0) {
this->index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get());
this->index_refine_ =
std::make_unique<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
this->index_refine_.get()->k_factor = search_param.refine_ratio;
}
}
Expand Down Expand Up @@ -349,9 +380,10 @@ class FaissGpuIVFSQ : public FaissGpu<T> {
faiss_search_params.nprobe = nprobe;

this->search_params_ = std::make_unique<faiss::IVFSearchParameters>(faiss_search_params);

this->refine_ratio_ = search_param.refine_ratio;
if (search_param.refine_ratio > 1.0) {
this->index_refine_ = std::make_unique<faiss::IndexRefineFlat>(this->index_.get());
this->index_refine_ =
std::make_unique<faiss::IndexRefineFlat>(this->index_.get(), this->dataset_);
this->index_refine_.get()->k_factor = search_param.refine_ratio;
}
}
Expand Down
20 changes: 10 additions & 10 deletions docs/source/ann_benchmarks_param_tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ IVF-pq is an inverted-file index, which partitions the vectors into a series of
| `M_ratio` | `build_param` | Y | Positive Integer Power of 2 [8-64] | | Ratio of numbeer of chunks or subquantizers for each vector. Computed by `dims` / `M_ratio` |
| `usePrecomputed` | `build_param` | N | Boolean. Default=`false` | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage. |
| `useFloat16` | `build_param` | N | Boolean. Default=`false` | `false` | Use half-precision floats for clustering step. |
| `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
| `nprobe` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
| `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. |

### `faiss_cpu_flat`
Expand All @@ -118,16 +118,16 @@ Use FAISS IVF-Flat index on CPU

Use FAISS IVF-PQ index on CPU

| Parameter | Type | Required | Data Type | Default | Description |
|-----------------|----------------|----------|------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `nlist` | `build_param` | Y | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
| `ratio` | `build_param` | N | Positive Integer >0 | 2 | `1/ratio` is the number of training points which should be used to train the clusters. |
| `M` | `build_param` | Y | Positive Integer Power of 2 [8-64] | | Number of chunks or subquantizers for each vector. |
| Parameter | Type | Required | Data Type | Default | Description |
|------------------|----------------|----------|------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `nlist` | `build_param` | Y | Positive Integer >0 | | Number of clusters to partition the vectors into. Larger values will put less points into each cluster but this will impact index build time as more clusters need to be trained. |
| `ratio` | `build_param` | N | Positive Integer >0 | 2 | `1/ratio` is the number of training points which should be used to train the clusters. |
| `M` | `build_param` | Y | Positive Integer Power of 2 [8-64] | | Number of chunks or subquantizers for each vector. |
| `usePrecomputed` | `build_param` | N | Boolean. Default=`false` | `false` | Use pre-computed lookup tables to speed up search at the cost of increased memory usage. |
| `bitsPerCode` | `build_param` | N | Positive Integer [4-8] | 8 | Number of bits to use for each code. |
| `numProbes` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
| `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. |
| `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. |
| `bitsPerCode` | `build_param` | N | Positive Integer [4-8] | 8 | Number of bits to use for each code. |
| `nprobe` | `search_params` | Y | Positive Integer >0 | | The closest number of clusters to search for each query vector. Larger values will improve recall but will search more points in the index. |
| `refine_ratio` | `search_params` | N| Positive Number >=0 | 0 | `refine_ratio * k` nearest neighbors are queried from the index initially and an additional refinement step improves recall by selecting only the best `k` neighbors. |
| `numThreads` | `search_params` | N | Positive Integer >0 | 1 | Number of threads to use for queries. |


## HNSW
Expand Down
1 change: 1 addition & 0 deletions docs/source/raft_ann_benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ A single configuration will often define a set of algorithms, with associated in
base_file: sift-128-euclidean/base.fbin
query_file: sift-128-euclidean/query.fbin
groundtruth_neighbors_file: sift-128-euclidean/groundtruth.neighbors.ibin
dims: 128
distance: euclidean
```
Expand Down
Loading

0 comments on commit 67a796c

Please sign in to comment.