Merge branch 'branch-23.10' into imp-google-benchmarks

rapidsai · Sep 8, 2023 · ef112d0 · ef112d0
2 parents 732b923 + e1c8566
commit ef112d0
Show file tree

Hide file tree

Showing 14 changed files with 158 additions and 105 deletions.
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
@@ -11,6 +11,6 @@ rapids-print-env
 
 rapids-logger "Begin cpp build"
 
-rapids-mamba-retry mambabuild conda/recipes/libraft
+rapids-conda-retry mambabuild conda/recipes/libraft
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_python.sh b/ci/build_python.sh
@@ -15,19 +15,19 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   conda/recipes/pylibraft
 
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
   conda/recipes/raft-dask
 
 # Build ann-bench for each cuda and python version
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
 --no-test \
 --channel "${CPP_CHANNEL}" \
 --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -37,7 +37,7 @@ conda/recipes/raft-ann-bench
 # version
 RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
-  rapids-mamba-retry mambabuild \
+  rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -5,6 +5,7 @@ set -euo pipefail
 
 package_name=$1
 package_dir=$2
+underscore_package_name=$(echo "${package_name}" | tr "-" "_")
 
 source rapids-configure-sccache
 source rapids-date-string
@@ -15,9 +16,36 @@ version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
-ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
-echo "The package name and/or version was modified in the package source. The git diff is:"
-git diff
+# This is the version of the suffix with a preceding hyphen. It's used
+# everywhere except in the final wheel name.
+PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"
+
+# Patch project metadata files to include the CUDA version suffix and version override.
+pyproject_file="${package_dir}/pyproject.toml"
+
+sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
+sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}
+
+# For nightlies we want to ensure that we're pulling in alphas as well. The
+# easiest way to do so is to augment the spec with a constraint containing a
+# min alpha version that doesn't affect the version bounds but does allow usage
+# of alpha versions for that dependency without --pre
+alpha_spec=''
+if ! rapids-is-release-build; then
+    alpha_spec=',>=0.0.0a0'
+fi
+
+if [[ ${package_name} == "raft-dask" ]]; then
+    sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
+    sed -i "s/ucx-py/ucx-py${PACKAGE_CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
+else
+    sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
+fi
+
+if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
+    sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
+    sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
+fi
 
 cd "${package_dir}"
 
@@ -27,4 +55,4 @@ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 mkdir -p final_dist
 python -m auditwheel repair -w final_dist dist/*
 
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
+RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
diff --git a/ci/build_wheel_raft_dask.sh b/ci/build_wheel_raft_dask.sh
@@ -6,9 +6,4 @@ set -euo pipefail
 # Set up skbuild options. Enable sccache in skbuild config options
 export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
 
-RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-
-RAPIDS_PY_WHEEL_NAME=pylibraft_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibraft
-python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl
-
-ci/build_wheel.sh raft_dask python/raft-dask
+ci/build_wheel.sh raft-dask python/raft-dask
diff --git a/ci/release/apply_wheel_modifications.sh b/ci/release/apply_wheel_modifications.sh
diff --git a/conda/recipes/raft-ann-bench-cpu/meta.yaml b/conda/recipes/raft-ann-bench-cpu/meta.yaml
@@ -57,6 +57,7 @@ requirements:
     - matplotlib
     - python
     - pyyaml
+    - benchmark
 
 about:
   home: https://rapids.ai/

diff --git a/conda/recipes/raft-ann-bench/meta.yaml b/conda/recipes/raft-ann-bench/meta.yaml
@@ -90,7 +90,11 @@ requirements:
     - libfaiss {{ faiss_version }}
     {% endif %}
     - h5py {{ h5py_version }}
-
+    - benchmark
+    - glog {{ glog_version }}
+    - matplotlib
+    - python
+    - pyyaml
 about:
   home: https://rapids.ai/
   license: Apache-2.0

diff --git a/cpp/bench/ann/src/faiss/faiss_benchmark.cu b/cpp/bench/ann/src/faiss/faiss_benchmark.cu
@@ -30,19 +30,27 @@
 
 namespace raft::bench::ann {
 
+template <typename T>
+void parse_base_build_param(const nlohmann::json& conf,
+                            typename raft::bench::ann::FaissGpu<T>::BuildParam& param)
+{
+  param.nlist = conf.at("nlist");
+  if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); }
+}
+
 template <typename T>
 void parse_build_param(const nlohmann::json& conf,
                        typename raft::bench::ann::FaissGpuIVFFlat<T>::BuildParam& param)
 {
-  param.nlist = conf.at("nlist");
+  parse_base_build_param<T>(conf, param);
 }
 
 template <typename T>
 void parse_build_param(const nlohmann::json& conf,
                        typename raft::bench::ann::FaissGpuIVFPQ<T>::BuildParam& param)
 {
-  param.nlist = conf.at("nlist");
-  param.M     = conf.at("M");
+  parse_base_build_param<T>(conf, param);
+  param.M = conf.at("M");
   if (conf.contains("usePrecomputed")) {
     param.usePrecomputed = conf.at("usePrecomputed");
   } else {
@@ -59,7 +67,7 @@ template <typename T>
 void parse_build_param(const nlohmann::json& conf,
                        typename raft::bench::ann::FaissGpuIVFSQ<T>::BuildParam& param)
 {
-  param.nlist          = conf.at("nlist");
+  parse_base_build_param<T>(conf, param);
   param.quantizer_type = conf.at("quantizer_type");
 }
 

diff --git a/cpp/bench/ann/src/faiss/faiss_wrapper.h b/cpp/bench/ann/src/faiss/faiss_wrapper.h
@@ -18,6 +18,7 @@
 
 #include "../common/ann_types.hpp"
 
+#include <raft/core/logger.hpp>
 #include <raft/util/cudart_utils.hpp>
 
 #include <faiss/IndexFlat.h>
@@ -85,7 +86,23 @@ class FaissGpu : public ANN<T> {
     float refine_ratio = 1.0;
   };
 
-  FaissGpu(Metric metric, int dim, int nlist);
+  struct BuildParam {
+    int nlist = 1;
+    int ratio = 2;
+  };
+
+  FaissGpu(Metric metric, int dim, const BuildParam& param)
+    : ANN<T>(metric, dim),
+      metric_type_(parse_metric_type(metric)),
+      nlist_{param.nlist},
+      training_sample_fraction_{1.0 / double(param.ratio)}
+  {
+    static_assert(std::is_same_v<T, float>, "faiss support only float type");
+    RAFT_CUDA_TRY(cudaGetDevice(&device_));
+    RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
+    faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
+  }
+
   virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); }
 
   void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final;
@@ -131,23 +148,35 @@ class FaissGpu : public ANN<T> {
   int device_;
   cudaEvent_t sync_{nullptr};
   cudaStream_t faiss_default_stream_{nullptr};
+  double training_sample_fraction_;
 };
 
-template <typename T>
-FaissGpu<T>::FaissGpu(Metric metric, int dim, int nlist)
-  : ANN<T>(metric, dim), metric_type_(parse_metric_type(metric)), nlist_(nlist)
-{
-  static_assert(std::is_same_v<T, float>, "faiss support only float type");
-  RAFT_CUDA_TRY(cudaGetDevice(&device_));
-  RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
-  faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
-}
-
 template <typename T>
 void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
 {
   OmpSingleThreadScope omp_single_thread;
-
+  auto index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index_.get());
+  if (index_ivf != nullptr) {
+    // set the min/max training size for clustering to use the whole provided training set.
+    double trainset_size       = training_sample_fraction_ * static_cast<double>(nrow);
+    double points_per_centroid = trainset_size / static_cast<double>(nlist_);
+    int max_ppc                = std::ceil(points_per_centroid);
+    int min_ppc                = std::floor(points_per_centroid);
+    if (min_ppc < index_ivf->cp.min_points_per_centroid) {
+      RAFT_LOG_WARN(
+        "The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d "
+        "points per cluster (n_lists = %d). This is smaller than the FAISS default "
+        "min_points_per_centroid = %d.",
+        static_cast<size_t>(trainset_size),
+        nrow,
+        training_sample_fraction_,
+        min_ppc,
+        nlist_,
+        index_ivf->cp.min_points_per_centroid);
+    }
+    index_ivf->cp.max_points_per_centroid = max_ppc;
+    index_ivf->cp.min_points_per_centroid = min_ppc;
+  }
   index_->train(nrow, dataset);  // faiss::gpu::GpuIndexFlat::train() will do nothing
   assert(index_->is_trained);
   index_->add(nrow, dataset);
@@ -208,12 +237,9 @@ void FaissGpu<T>::load_(const std::string& file)
 template <typename T>
 class FaissGpuIVFFlat : public FaissGpu<T> {
  public:
-  struct BuildParam {
-    int nlist;
-  };
+  using typename FaissGpu<T>::BuildParam;
 
-  FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param)
-    : FaissGpu<T>(metric, dim, param.nlist)
+  FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
   {
     faiss::gpu::GpuIndexIVFFlatConfig config;
     config.device = this->device_;
@@ -234,15 +260,13 @@ class FaissGpuIVFFlat : public FaissGpu<T> {
 template <typename T>
 class FaissGpuIVFPQ : public FaissGpu<T> {
  public:
-  struct BuildParam {
-    int nlist;
+  struct BuildParam : public FaissGpu<T>::BuildParam {
     int M;
     bool useFloat16;
     bool usePrecomputed;
   };
 
-  FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param)
-    : FaissGpu<T>(metric, dim, param.nlist)
+  FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
   {
     faiss::gpu::GpuIndexIVFPQConfig config;
     config.useFloat16LookupTables = param.useFloat16;
@@ -271,13 +295,11 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
 template <typename T>
 class FaissGpuIVFSQ : public FaissGpu<T> {
  public:
-  struct BuildParam {
-    int nlist;
+  struct BuildParam : public FaissGpu<T>::BuildParam {
     std::string quantizer_type;
   };
 
-  FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param)
-    : FaissGpu<T>(metric, dim, param.nlist)
+  FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
   {
     faiss::ScalarQuantizer::QuantizerType qtype;
     if (param.quantizer_type == "fp16") {
@@ -310,7 +332,8 @@ class FaissGpuIVFSQ : public FaissGpu<T> {
 template <typename T>
 class FaissGpuFlat : public FaissGpu<T> {
  public:
-  FaissGpuFlat(Metric metric, int dim) : FaissGpu<T>(metric, dim, 0)
+  FaissGpuFlat(Metric metric, int dim)
+    : FaissGpu<T>(metric, dim, typename FaissGpu<T>::BuildParam{})
   {
     faiss::gpu::GpuIndexFlatConfig config;
     config.device = this->device_;

diff --git a/cpp/bench/ann/src/raft/raft_benchmark.cu b/cpp/bench/ann/src/raft/raft_benchmark.cu
@@ -58,10 +58,7 @@ void parse_build_param(const nlohmann::json& conf,
 {
   param.n_lists = conf.at("nlist");
   if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
-  if (conf.contains("ratio")) {
-    param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio");
-    std::cout << "kmeans_trainset_fraction " << param.kmeans_trainset_fraction;
-  }
+  if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
 }
 
 template <typename T, typename IdxT>
@@ -82,6 +79,17 @@ void parse_build_param(const nlohmann::json& conf,
   if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
   if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); }
   if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); }
+  if (conf.contains("codebook_kind")) {
+    std::string kind = conf.at("codebook_kind");
+    if (kind == "cluster") {
+      param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER;
+    } else if (kind == "subspace") {
+      param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
+    } else {
+      throw std::runtime_error("codebook_kind: '" + kind +
+                               "', should be either 'cluster' or 'subspace'");
+    }
+  }
 }
 
 template <typename T, typename IdxT>
-Original file line number
+Diff line change
@@ Expand Up / @@ -57,6 +57,7 @@ requirements: @@
         - matplotlib
         - python
         - pyyaml
+        - benchmark
     about:
       home: https://rapids.ai/
@@ Expand Down @@