remove hardcoded pool size

rapidsai · Nov 29, 2023 · ab442b3 · ab442b3
1 parent 09bcbd8
commit ab442b3
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 14 deletions.
diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h
@@ -109,11 +109,14 @@ class FaissGpu : public ANN<T> {
     RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
     faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
     raft::resource::set_cuda_stream(handle_, faiss_default_stream_);
+    RAFT_LOG_INFO("device %d", device_);
+    // store the current memory resource in case it is modified by the algorithm
     current_mr_ = rmm::mr::get_per_device_resource(rmm::cuda_device_id{device_});
   }
 
   virtual ~FaissGpu() noexcept
   {
+    // restore the old memory resource
     rmm::mr::set_per_device_resource(rmm::cuda_device_id{device_}, current_mr_);
     RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_));
   }
@@ -198,6 +201,8 @@ void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
     index_ivf->cp.min_points_per_centroid = min_ppc;
   }
   index_->train(nrow, dataset);  // faiss::gpu::GpuIndexFlat::train() will do nothing
+  cudaDeviceSynchronize();
+  RAFT_LOG_INFO("faiss index trained");
   assert(index_->is_trained);
   index_->add(nrow, dataset);
   stream_wait(stream);
@@ -323,17 +328,25 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
     config.device                 = this->device_;
 
     if (config.use_raft) {
-      rmm::mr::cuda_memory_resource cuda_mr;
-      // Construct a resource that uses a coalescing best-fit pool allocator
-      rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource> pool_mr{
-        &cuda_mr, pow(2, 30), pow(2, 31)};
-      rmm::mr::set_per_device_resource(rmm::cuda_device_id{this->device_}, &pool_mr);
+      auto result =
+        std::shared_ptr<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>{nullptr};
+
+      auto* upstream = dynamic_cast<rmm::mr::cuda_memory_resource*>(
+        rmm::mr::get_per_device_resource(rmm::cuda_device_id(this->device_)));
+      if (upstream != nullptr) {
+        auto result =
+          std::make_shared<rmm::mr::pool_memory_resource<rmm::mr::cuda_memory_resource>>(upstream);
+        rmm::mr::set_per_device_resource(rmm::cuda_device_id(this->device_), result.get());
+      }
     }
+    cudaDeviceSynchronize();
+    RAFT_LOG_INFO("set to pool resource");
 
-    this->index_ = std::make_unique<faiss::gpu::GpuIndexIVFPQ>(&(this->gpu_resource_),
+    int subQuantizers = dim / param.M;
+    this->index_      = std::make_unique<faiss::gpu::GpuIndexIVFPQ>(&(this->gpu_resource_),
                                                                dim,
                                                                param.nlist,
-                                                               param.M,
+                                                               subQuantizers,
                                                                param.bitsPerCode,
                                                                this->metric_type_,
                                                                config);

diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml
@@ -3,7 +3,7 @@ groups:
   base:
     build:
       nlist: [1024, 2048, 4096, 8192]
-      M: [8, 16]
+      M: [4, 8, 16]
       ratio: [10, 25]
       usePrecomputed: [False]
       useFloat16: [False]
@@ -12,16 +12,31 @@ groups:
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
       refine_ratio: [1]
-  raft_enabled:
+  raft_disabled_million_scale:
     build:
-      nlist: [1024, 2048, 4096, 8192]
-      M: [8, 16]
-      ratio: [10, 25]
+      nlist: [4096]
+      M: [4, 8, 16]
+      ratio: [1]
+      usePrecomputed: [False, True]
+      useFloat16: [False, True]
+      bitsPerCode: [8]
+      interleavedLayout: [False]
+      use_raft: [False]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+      k: [10, 100]
+      refine_ratio: [1]
+  raft_enabled_million_scale:
+    build:
+      nlist: [4096]
+      M: [4, 8, 16]
+      ratio: [1]
       usePrecomputed: [False]
-      useFloat16: [True, False]
+      useFloat16: [False, True]
       bitsPerCode: [8]
       interleavedLayout: [True]
       use_raft: [True]
     search:
       nprobe: [1, 5, 10, 50, 100, 200]
+      k: [10, 100]
       refine_ratio: [1]
diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/raft_ivf_pq.yaml
@@ -14,4 +14,16 @@ groups:
       nprobe: [1, 5, 10, 50, 100, 200]
       internalDistanceDtype: ["float"]
       smemLutDtype: ["float", "fp8", "half"]
-      refine_ratio: [1, 2, 4]
+      refine_ratio: [1, 2, 4]
+  cmp_faiss_million_scale:
+    build:
+      nlist: [4096]
+      pq_dim: [32, 16, 8]
+      pq_bits: [8]
+      ratio: [1]
+      niter: [25]
+    search:
+      nprobe: [1, 5, 10, 50, 100, 200]
+      k: [10, 100]
+      smemLutDtype: ["float", "half"]
+      refine_ratio: [1]