From a3acb5d3decbe7245f07ac0f768cd6cc18e6e71f Mon Sep 17 00:00:00 2001 From: Tarang Jain Date: Thu, 30 Nov 2023 16:59:11 -0800 Subject: [PATCH] update faiss::gpu::benchmark main, revert pool MR in constructor --- .../ann/src/faiss/faiss_gpu_benchmark.cu | 17 ++++++---- cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h | 31 ++----------------- .../run/conf/algos/faiss_gpu_ivf_pq.yaml | 4 +-- 3 files changed, 14 insertions(+), 38 deletions(-) diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu index 67ae2a27c1..ef3178ed76 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu +++ b/cpp/bench/ann/src/faiss/faiss_gpu_benchmark.cu @@ -76,11 +76,6 @@ void parse_build_param(const nlohmann::json& conf, } else { param.bitsPerCode = 8; } - if (conf.contains("interleavedLayout")) { - param.interleavedLayout = conf.at("interleavedLayout"); - } else { - param.interleavedLayout = false; - } } template @@ -178,5 +173,15 @@ REGISTER_ALGO_INSTANCE(std::uint8_t); #ifdef ANN_BENCH_BUILD_MAIN #include "../common/benchmark.hpp" -int main(int argc, char** argv) { return raft::bench::ann::run_main(argc, argv); } +int main(int argc, char** argv) +{ + rmm::mr::cuda_memory_resource cuda_mr; + // Construct a resource that uses a coalescing best-fit pool allocator + rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; + rmm::mr::set_current_device_resource( + &pool_mr); // Updates the current device resource pointer to `pool_mr` + rmm::mr::device_memory_resource* mr = + rmm::mr::get_current_device_resource(); // Points to `pool_mr` + return raft::bench::ann::run_main(argc, argv); +} #endif diff --git a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h index 68ee223419..97c902c5da 100644 --- a/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h +++ b/cpp/bench/ann/src/faiss/faiss_gpu_wrapper.h @@ -109,17 +109,9 @@ class FaissGpu : public ANN { RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming)); faiss_default_stream_ = gpu_resource_.getDefaultStream(device_); raft::resource::set_cuda_stream(handle_, faiss_default_stream_); - RAFT_LOG_INFO("device %d", device_); - // store the current memory resource in case it is modified by the algorithm - current_mr_ = rmm::mr::get_per_device_resource(rmm::cuda_device_id{device_}); } - virtual ~FaissGpu() noexcept - { - // restore the old memory resource - rmm::mr::set_per_device_resource(rmm::cuda_device_id{device_}, current_mr_); - RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); - } + virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); } void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final; @@ -171,7 +163,6 @@ class FaissGpu : public ANN { const T* dataset_; raft::device_resources handle_; float refine_ratio_ = 1.0; - rmm::mr::device_memory_resource* current_mr_{nullptr}; }; template @@ -201,8 +192,6 @@ void FaissGpu::build(const T* dataset, size_t nrow, cudaStream_t stream) index_ivf->cp.min_points_per_centroid = min_ppc; } index_->train(nrow, dataset); // faiss::gpu::GpuIndexFlat::train() will do nothing - cudaDeviceSynchronize(); - RAFT_LOG_INFO("faiss index trained"); assert(index_->is_trained); index_->add(nrow, dataset); stream_wait(stream); @@ -313,7 +302,6 @@ class FaissGpuIVFPQ : public FaissGpu { int M; bool useFloat16; bool usePrecomputed; - bool interleavedLayout; bool use_raft; int bitsPerCode; }; @@ -324,24 +312,9 @@ class FaissGpuIVFPQ : public FaissGpu { config.useFloat16LookupTables = param.useFloat16; config.usePrecomputedTables = param.usePrecomputed; config.use_raft = param.use_raft; - config.interleavedLayout = param.interleavedLayout; + config.interleavedLayout = param.use_raft; config.device = this->device_; - if (config.use_raft) { - auto result = - std::shared_ptr>{nullptr}; - - auto* upstream = dynamic_cast( - rmm::mr::get_per_device_resource(rmm::cuda_device_id(this->device_))); - if (upstream != nullptr) { - auto result = - std::make_shared>(upstream); - rmm::mr::set_per_device_resource(rmm::cuda_device_id(this->device_), result.get()); - } - } - cudaDeviceSynchronize(); - RAFT_LOG_INFO("set to pool resource"); - int subQuantizers = dim / param.M; this->index_ = std::make_unique(&(this->gpu_resource_), dim, diff --git a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml index 7cbbd7be93..2a54936d04 100644 --- a/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml +++ b/python/raft-ann-bench/src/raft-ann-bench/run/conf/algos/faiss_gpu_ivf_pq.yaml @@ -20,7 +20,6 @@ groups: usePrecomputed: [False, True] useFloat16: [False, True] bitsPerCode: [8] - interleavedLayout: [False] use_raft: [False] search: nprobe: [1, 5, 10, 50, 100, 200] @@ -29,12 +28,11 @@ groups: raft_enabled_million_scale: build: nlist: [4096] - M: [4, 8, 16] + M: [16] ratio: [1] usePrecomputed: [False] useFloat16: [False, True] bitsPerCode: [8] - interleavedLayout: [True] use_raft: [True] search: nprobe: [1, 5, 10, 50, 100, 200]