Skip to content

Commit

Permalink
Merge branch 'branch-23.10' into imp-google-benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
cjnolet authored Sep 8, 2023
2 parents 732b923 + e1c8566 commit ef112d0
Show file tree
Hide file tree
Showing 14 changed files with 158 additions and 105 deletions.
2 changes: 1 addition & 1 deletion ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ rapids-print-env

rapids-logger "Begin cpp build"

rapids-mamba-retry mambabuild conda/recipes/libraft
rapids-conda-retry mambabuild conda/recipes/libraft

rapids-upload-conda-to-s3 cpp
8 changes: 4 additions & 4 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

# TODO: Remove `--no-test` flags once importing on a CPU
# node works correctly
rapids-mamba-retry mambabuild \
rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
conda/recipes/pylibraft

rapids-mamba-retry mambabuild \
rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/raft-dask

# Build ann-bench for each cuda and python version
rapids-mamba-retry mambabuild \
rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
Expand All @@ -37,7 +37,7 @@ conda/recipes/raft-ann-bench
# version
RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
rapids-mamba-retry mambabuild \
rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
Expand Down
36 changes: 32 additions & 4 deletions ci/build_wheel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set -euo pipefail

package_name=$1
package_dir=$2
underscore_package_name=$(echo "${package_name}" | tr "-" "_")

source rapids-configure-sccache
source rapids-date-string
Expand All @@ -15,9 +16,36 @@ version_override="$(rapids-pip-wheel-version ${RAPIDS_DATE_STRING})"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

ci/release/apply_wheel_modifications.sh ${version_override} "-${RAPIDS_PY_CUDA_SUFFIX}"
echo "The package name and/or version was modified in the package source. The git diff is:"
git diff
# This is the version of the suffix with a preceding hyphen. It's used
# everywhere except in the final wheel name.
PACKAGE_CUDA_SUFFIX="-${RAPIDS_PY_CUDA_SUFFIX}"

# Patch project metadata files to include the CUDA version suffix and version override.
pyproject_file="${package_dir}/pyproject.toml"

sed -i "s/^version = .*/version = \"${version_override}\"/g" ${pyproject_file}
sed -i "s/name = \"${package_name}\"/name = \"${package_name}${PACKAGE_CUDA_SUFFIX}\"/g" ${pyproject_file}

# For nightlies we want to ensure that we're pulling in alphas as well. The
# easiest way to do so is to augment the spec with a constraint containing a
# min alpha version that doesn't affect the version bounds but does allow usage
# of alpha versions for that dependency without --pre
alpha_spec=''
if ! rapids-is-release-build; then
alpha_spec=',>=0.0.0a0'
fi

if [[ ${package_name} == "raft-dask" ]]; then
sed -r -i "s/pylibraft==(.*)\"/pylibraft${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file}
sed -i "s/ucx-py/ucx-py${PACKAGE_CUDA_SUFFIX}/g" python/raft-dask/pyproject.toml
else
sed -r -i "s/rmm(.*)\"/rmm${PACKAGE_CUDA_SUFFIX}\1${alpha_spec}\"/g" ${pyproject_file}
fi

if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then
sed -i "s/cuda-python[<=>\.,0-9a]*/cuda-python>=12.0,<13.0a0/g" ${pyproject_file}
sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file}
fi

cd "${package_dir}"

Expand All @@ -27,4 +55,4 @@ python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
mkdir -p final_dist
python -m auditwheel repair -w final_dist dist/*

RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist
7 changes: 1 addition & 6 deletions ci/build_wheel_raft_dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,4 @@ set -euo pipefail
# Set up skbuild options. Enable sccache in skbuild config options
export SKBUILD_CONFIGURE_OPTIONS="-DRAFT_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

RAPIDS_PY_WHEEL_NAME=pylibraft_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibraft
python -m pip install --no-deps ./local-pylibraft/pylibraft*.whl

ci/build_wheel.sh raft_dask python/raft-dask
ci/build_wheel.sh raft-dask python/raft-dask
25 changes: 0 additions & 25 deletions ci/release/apply_wheel_modifications.sh

This file was deleted.

1 change: 1 addition & 0 deletions conda/recipes/raft-ann-bench-cpu/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ requirements:
- matplotlib
- python
- pyyaml
- benchmark

about:
home: https://rapids.ai/
Expand Down
6 changes: 5 additions & 1 deletion conda/recipes/raft-ann-bench/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ requirements:
- libfaiss {{ faiss_version }}
{% endif %}
- h5py {{ h5py_version }}

- benchmark
- glog {{ glog_version }}
- matplotlib
- python
- pyyaml
about:
home: https://rapids.ai/
license: Apache-2.0
Expand Down
16 changes: 12 additions & 4 deletions cpp/bench/ann/src/faiss/faiss_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,27 @@

namespace raft::bench::ann {

template <typename T>
void parse_base_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpu<T>::BuildParam& param)
{
param.nlist = conf.at("nlist");
if (conf.contains("ratio")) { param.ratio = conf.at("ratio"); }
}

template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFFlat<T>::BuildParam& param)
{
param.nlist = conf.at("nlist");
parse_base_build_param<T>(conf, param);
}

template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFPQ<T>::BuildParam& param)
{
param.nlist = conf.at("nlist");
param.M = conf.at("M");
parse_base_build_param<T>(conf, param);
param.M = conf.at("M");
if (conf.contains("usePrecomputed")) {
param.usePrecomputed = conf.at("usePrecomputed");
} else {
Expand All @@ -59,7 +67,7 @@ template <typename T>
void parse_build_param(const nlohmann::json& conf,
typename raft::bench::ann::FaissGpuIVFSQ<T>::BuildParam& param)
{
param.nlist = conf.at("nlist");
parse_base_build_param<T>(conf, param);
param.quantizer_type = conf.at("quantizer_type");
}

Expand Down
75 changes: 49 additions & 26 deletions cpp/bench/ann/src/faiss/faiss_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "../common/ann_types.hpp"

#include <raft/core/logger.hpp>
#include <raft/util/cudart_utils.hpp>

#include <faiss/IndexFlat.h>
Expand Down Expand Up @@ -85,7 +86,23 @@ class FaissGpu : public ANN<T> {
float refine_ratio = 1.0;
};

FaissGpu(Metric metric, int dim, int nlist);
struct BuildParam {
int nlist = 1;
int ratio = 2;
};

FaissGpu(Metric metric, int dim, const BuildParam& param)
: ANN<T>(metric, dim),
metric_type_(parse_metric_type(metric)),
nlist_{param.nlist},
training_sample_fraction_{1.0 / double(param.ratio)}
{
static_assert(std::is_same_v<T, float>, "faiss support only float type");
RAFT_CUDA_TRY(cudaGetDevice(&device_));
RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
}

virtual ~FaissGpu() noexcept { RAFT_CUDA_TRY_NO_THROW(cudaEventDestroy(sync_)); }

void build(const T* dataset, size_t nrow, cudaStream_t stream = 0) final;
Expand Down Expand Up @@ -131,23 +148,35 @@ class FaissGpu : public ANN<T> {
int device_;
cudaEvent_t sync_{nullptr};
cudaStream_t faiss_default_stream_{nullptr};
double training_sample_fraction_;
};

template <typename T>
FaissGpu<T>::FaissGpu(Metric metric, int dim, int nlist)
: ANN<T>(metric, dim), metric_type_(parse_metric_type(metric)), nlist_(nlist)
{
static_assert(std::is_same_v<T, float>, "faiss support only float type");
RAFT_CUDA_TRY(cudaGetDevice(&device_));
RAFT_CUDA_TRY(cudaEventCreate(&sync_, cudaEventDisableTiming));
faiss_default_stream_ = gpu_resource_.getDefaultStream(device_);
}

template <typename T>
void FaissGpu<T>::build(const T* dataset, size_t nrow, cudaStream_t stream)
{
OmpSingleThreadScope omp_single_thread;

auto index_ivf = dynamic_cast<faiss::gpu::GpuIndexIVF*>(index_.get());
if (index_ivf != nullptr) {
// set the min/max training size for clustering to use the whole provided training set.
double trainset_size = training_sample_fraction_ * static_cast<double>(nrow);
double points_per_centroid = trainset_size / static_cast<double>(nlist_);
int max_ppc = std::ceil(points_per_centroid);
int min_ppc = std::floor(points_per_centroid);
if (min_ppc < index_ivf->cp.min_points_per_centroid) {
RAFT_LOG_WARN(
"The suggested training set size %zu (data size %zu, training sample ratio %f) yields %d "
"points per cluster (n_lists = %d). This is smaller than the FAISS default "
"min_points_per_centroid = %d.",
static_cast<size_t>(trainset_size),
nrow,
training_sample_fraction_,
min_ppc,
nlist_,
index_ivf->cp.min_points_per_centroid);
}
index_ivf->cp.max_points_per_centroid = max_ppc;
index_ivf->cp.min_points_per_centroid = min_ppc;
}
index_->train(nrow, dataset); // faiss::gpu::GpuIndexFlat::train() will do nothing
assert(index_->is_trained);
index_->add(nrow, dataset);
Expand Down Expand Up @@ -208,12 +237,9 @@ void FaissGpu<T>::load_(const std::string& file)
template <typename T>
class FaissGpuIVFFlat : public FaissGpu<T> {
public:
struct BuildParam {
int nlist;
};
using typename FaissGpu<T>::BuildParam;

FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param)
: FaissGpu<T>(metric, dim, param.nlist)
FaissGpuIVFFlat(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
{
faiss::gpu::GpuIndexIVFFlatConfig config;
config.device = this->device_;
Expand All @@ -234,15 +260,13 @@ class FaissGpuIVFFlat : public FaissGpu<T> {
template <typename T>
class FaissGpuIVFPQ : public FaissGpu<T> {
public:
struct BuildParam {
int nlist;
struct BuildParam : public FaissGpu<T>::BuildParam {
int M;
bool useFloat16;
bool usePrecomputed;
};

FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param)
: FaissGpu<T>(metric, dim, param.nlist)
FaissGpuIVFPQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
{
faiss::gpu::GpuIndexIVFPQConfig config;
config.useFloat16LookupTables = param.useFloat16;
Expand Down Expand Up @@ -271,13 +295,11 @@ class FaissGpuIVFPQ : public FaissGpu<T> {
template <typename T>
class FaissGpuIVFSQ : public FaissGpu<T> {
public:
struct BuildParam {
int nlist;
struct BuildParam : public FaissGpu<T>::BuildParam {
std::string quantizer_type;
};

FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param)
: FaissGpu<T>(metric, dim, param.nlist)
FaissGpuIVFSQ(Metric metric, int dim, const BuildParam& param) : FaissGpu<T>(metric, dim, param)
{
faiss::ScalarQuantizer::QuantizerType qtype;
if (param.quantizer_type == "fp16") {
Expand Down Expand Up @@ -310,7 +332,8 @@ class FaissGpuIVFSQ : public FaissGpu<T> {
template <typename T>
class FaissGpuFlat : public FaissGpu<T> {
public:
FaissGpuFlat(Metric metric, int dim) : FaissGpu<T>(metric, dim, 0)
FaissGpuFlat(Metric metric, int dim)
: FaissGpu<T>(metric, dim, typename FaissGpu<T>::BuildParam{})
{
faiss::gpu::GpuIndexFlatConfig config;
config.device = this->device_;
Expand Down
16 changes: 12 additions & 4 deletions cpp/bench/ann/src/raft/raft_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,7 @@ void parse_build_param(const nlohmann::json& conf,
{
param.n_lists = conf.at("nlist");
if (conf.contains("niter")) { param.kmeans_n_iters = conf.at("niter"); }
if (conf.contains("ratio")) {
param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio");
std::cout << "kmeans_trainset_fraction " << param.kmeans_trainset_fraction;
}
if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
}

template <typename T, typename IdxT>
Expand All @@ -82,6 +79,17 @@ void parse_build_param(const nlohmann::json& conf,
if (conf.contains("ratio")) { param.kmeans_trainset_fraction = 1.0 / (double)conf.at("ratio"); }
if (conf.contains("pq_bits")) { param.pq_bits = conf.at("pq_bits"); }
if (conf.contains("pq_dim")) { param.pq_dim = conf.at("pq_dim"); }
if (conf.contains("codebook_kind")) {
std::string kind = conf.at("codebook_kind");
if (kind == "cluster") {
param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_CLUSTER;
} else if (kind == "subspace") {
param.codebook_kind = raft::neighbors::ivf_pq::codebook_gen::PER_SUBSPACE;
} else {
throw std::runtime_error("codebook_kind: '" + kind +
"', should be either 'cluster' or 'subspace'");
}
}
}

template <typename T, typename IdxT>
Expand Down
Loading

0 comments on commit ef112d0

Please sign in to comment.