Skip to content

Commit

Permalink
remove hardcoded pool size
Browse files Browse the repository at this point in the history
  • Loading branch information
tarang-jain committed Nov 29, 2023
1 parent 09bcbd8 commit ab442b3
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 14 deletions.
27 changes: 20 additions & 7 deletions cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,11 +109,14 @@ class FaissGpu : public ANN<T> {
RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
raft::resource::set_cuda_stream(handle_, faiss_default_stream_);
RAFT_LOG_INFO("device %d", device_);
// store the current memory resource in case it is modified by the algorithm
current_mr_ = rmm::mr::get_per_device_resource(rmm::cuda_device_id{device_});
}

virtual ~FaissGpu() noexcept
{
// restore the old memory resource
rmm::mr::set_per_device_resource(rmm::cuda_device_id{device_}, current_mr_);
RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_));
}
Expand Down Expand Up @@ -198,6 +201,8 @@ void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
index_ivf->cp.min_points_per_centroid = min_ppc;
}
index_->train(nrow, dataset); // faiss::gpu::GpuIndexFlat::train() will do nothing
cudaDeviceSynchronize();
RAFT_LOG_INFO("faiss index trained");
assert(index_->is_trained);
index_->add(nrow, dataset);
stream_wait(stream);
Expand Down Expand Up @@ -323,17 +328,25 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
config.device = this->device_;

if (config.use_raft) {
rmm::mr::cuda_memory_resource cuda_mr;
// Construct a resource that uses a coalescing best-fit pool allocator
rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
&cuda_mr, pow(2, 30), pow(2, 31)};
rmm::mr::set_per_device_resource(rmm::cuda_device_id{this->device_}, &pool_mr);
auto result =
std::shared_ptr<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>{nullptr};

auto* upstream = dynamic_cast<rmm::mr::cuda_memory_resource*>(
rmm::mr::get_per_device_resource(rmm::cuda_device_id(this->device_)));
if (upstream != nullptr) {
auto result =
std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(upstream);
rmm::mr::set_per_device_resource(rmm::cuda_device_id(this->device_), result.get());
}
}
cudaDeviceSynchronize();
RAFT_LOG_INFO("set to pool resource");

this->index_ = std::make_unique<faiss::gpu::GpuIndexIVFPQ>(&(this->gpu_resource_),
int subQuantizers = dim / param.M;
this->index_ = std::make_unique<faiss::gpu::GpuIndexIVFPQ>(&(this->gpu_resource_),
dim,
param.nlist,
param.M,
subQuantizers,
param.bitsPerCode,
this->metric_type_,
config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ groups:
base:
build:
nlist: [1024, 2048, 4096, 8192]
M: [8, 16]
M: [4, 8, 16]
ratio: [10, 25]
usePrecomputed: [False]
useFloat16: [False]
Expand All @@ -12,16 +12,31 @@ groups:
search:
nprobe: [1, 5, 10, 50, 100, 200]
refine_ratio: [1]
raft_enabled:
raft_disabled_million_scale:
build:
nlist: [1024, 2048, 4096, 8192]
M: [8, 16]
ratio: [10, 25]
nlist: [4096]
M: [4, 8, 16]
ratio: [1]
usePrecomputed: [False, True]
useFloat16: [False, True]
bitsPerCode: [8]
interleavedLayout: [False]
use_raft: [False]
search:
nprobe: [1, 5, 10, 50, 100, 200]
k: [10, 100]
refine_ratio: [1]
raft_enabled_million_scale:
build:
nlist: [4096]
M: [4, 8, 16]
ratio: [1]
usePrecomputed: [False]
useFloat16: [True, False]
useFloat16: [False, True]
bitsPerCode: [8]
interleavedLayout: [True]
use_raft: [True]
search:
nprobe: [1, 5, 10, 50, 100, 200]
k: [10, 100]
refine_ratio: [1]
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,16 @@ groups:
nprobe: [1, 5, 10, 50, 100, 200]
internalDistanceDtype: ["float"]
smemLutDtype: ["float", "fp8", "half"]
refine_ratio: [1, 2, 4]
refine_ratio: [1, 2, 4]
cmp_faiss_million_scale:
build:
nlist: [4096]
pq_dim: [32, 16, 8]
pq_bits: [8]
ratio: [1]
niter: [25]
search:
nprobe: [1, 5, 10, 50, 100, 200]
k: [10, 100]
smemLutDtype: ["float", "half"]
refine_ratio: [1]

0 comments on commit ab442b3

Please sign in to comment.